#Set Up & Basic Analysis ——

if (!require('readxl')) 
{
  install.packages('readxl');
  library(readxl)
}
## Loading required package: readxl
if (!require('tidyverse')) 
{
  install.packages('tidyverse');
  library(tidyverse);
}
## Loading required package: tidyverse
## -- Attaching packages ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2     v purrr   0.3.4
## v tibble  3.0.3     v dplyr   1.0.2
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.0
## Warning: package 'stringr' was built under R version 4.0.3
## -- Conflicts -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
if (!require('tinytex')) 
{
  install.packages('tinytex');
  tinytex::install_tinytex;
  library(tinytex)
}
## Loading required package: tinytex
if (!require('igraph')) 
{
  install.packages('igraph');
  library(igraph)
}
## Loading required package: igraph
## 
## Attaching package: 'igraph'
## The following objects are masked from 'package:dplyr':
## 
##     as_data_frame, groups, union
## The following objects are masked from 'package:purrr':
## 
##     compose, simplify
## The following object is masked from 'package:tidyr':
## 
##     crossing
## The following object is masked from 'package:tibble':
## 
##     as_data_frame
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union
if (!require('rlist')) 
{
  install.packages('rlist');
  library(rlist)
}
## Loading required package: rlist
## Warning: package 'rlist' was built under R version 4.0.3
if (!require('matrixStats')) 
{
  install.packages('matrixStats');
  library(matrixStats)
}
## Loading required package: matrixStats
## Warning: package 'matrixStats' was built under R version 4.0.3
## 
## Attaching package: 'matrixStats'
## The following object is masked from 'package:dplyr':
## 
##     count
if (!require('randnet')) 
{
  install.packages('randnet');
  library(randnet)
}
## Loading required package: randnet
## Warning: package 'randnet' was built under R version 4.0.3
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## Loading required package: entropy
## Warning: package 'entropy' was built under R version 4.0.3
## Loading required package: AUC
## Warning: package 'AUC' was built under R version 4.0.3
## AUC 0.3.0
## Type AUCNews() to see the change log and ?AUC to get an overview.
if (!require('lsm')) 
{
  install.packages('lsm');
  library(lsm)
}
## Loading required package: lsm
## Warning: package 'lsm' was built under R version 4.0.3
if (!require('fields')) 
{
  install.packages('fields');
  library(fields)
}
## Loading required package: fields
## Warning: package 'fields' was built under R version 4.0.3
## Loading required package: spam
## Warning: package 'spam' was built under R version 4.0.3
## Loading required package: dotCall64
## Warning: package 'dotCall64' was built under R version 4.0.3
## Loading required package: grid
## Spam version 2.5-1 (2019-12-12) is loaded.
## Type 'help( Spam)' or 'demo( spam)' for a short introduction 
## and overview of this package.
## Help for individual functions is also obtained by adding the
## suffix '.spam' to the function name, e.g. 'help( chol.spam)'.
## 
## Attaching package: 'spam'
## The following object is masked from 'package:Matrix':
## 
##     det
## The following objects are masked from 'package:base':
## 
##     backsolve, forwardsolve
## See https://github.com/NCAR/Fields for
##  an extensive vignette, other supplements and source code
if (!require('ggthemes')) 
{
  install.packages('ggthemes');
  library(ggthemes)
}
## Loading required package: ggthemes
## Warning: package 'ggthemes' was built under R version 4.0.3
if (!require('latentnet')) 
{
  install.packages('latentnet');
  library(latentnet)
}
## Loading required package: latentnet
## Warning: package 'latentnet' was built under R version 4.0.3
## Loading required package: network
## Warning: package 'network' was built under R version 4.0.3
## network: Classes for Relational Data
## Version 1.16.1 created on 2020-10-06.
## copyright (c) 2005, Carter T. Butts, University of California-Irvine
##                     Mark S. Handcock, University of California -- Los Angeles
##                     David R. Hunter, Penn State University
##                     Martina Morris, University of Washington
##                     Skye Bender-deMoll, University of Washington
##  For citation information, type citation("network").
##  Type help("network-package") to get started.
## 
## Attaching package: 'network'
## The following objects are masked from 'package:igraph':
## 
##     %c%, %s%, add.edges, add.vertices, delete.edges, delete.vertices,
##     get.edge.attribute, get.edges, get.vertex.attribute, is.bipartite,
##     is.directed, list.edge.attributes, list.vertex.attributes,
##     set.edge.attribute, set.vertex.attribute
## Loading required package: ergm
## Warning: package 'ergm' was built under R version 4.0.3
## 
## ergm: version 3.11.0, created on 2020-10-14
## Copyright (c) 2020, Mark S. Handcock, University of California -- Los Angeles
##                     David R. Hunter, Penn State University
##                     Carter T. Butts, University of California -- Irvine
##                     Steven M. Goodreau, University of Washington
##                     Pavel N. Krivitsky, UNSW Sydney
##                     Martina Morris, University of Washington
##                     with contributions from
##                     Li Wang
##                     Kirk Li, University of Washington
##                     Skye Bender-deMoll, University of Washington
##                     Chad Klumb
##                     Michal Bojanowski, Kozminski University
##                     Ben Bolker
## Based on "statnet" project software (statnet.org).
## For license and citation information see statnet.org/attribution
## or type citation("ergm").
## NOTE: Versions before 3.6.1 had a bug in the implementation of the bd()
## constraint which distorted the sampled distribution somewhat. In
## addition, Sampson's Monks datasets had mislabeled vertices. See the
## NEWS and the documentation for more details.
## NOTE: Some common term arguments pertaining to vertex attribute and
## level selection have changed in 3.10.0. See terms help for more
## details. Use 'options(ergm.term=list(version="3.9.4"))' to use old
## behavior.
## 
## latentnet: version 2.10.5, created on 2020-03-20
## Copyright (c) 2020, Pavel N. Krivitsky, UNSW Sydney
##                     Mark S. Handcock, University of California -- Los Angeles
##                     with contributions from
##                     Susan M. Shortreed
##                     Jeremy Tantrum
##                     Peter D. Hoff
##                     Li Wang
##                     Kirk Li, University of Washington
##                     Jake Fisher
##                     Jordan T. Bates
## Based on "statnet" project software (statnet.org).
## For license and citation information see statnet.org/attribution
## or type citation("latentnet").
## NOTE: BIC calculation prior to latentnet 2.7.0 had a bug in the calculation of the effective number of parameters. See help(summary.ergmm) for details.
## NOTE: Prior to version 2.8.0, handling of fixed effects for directed networks had a bug. See help("ergmm-terms") for details.
# Read in Data for one tab
data <- read_excel("a_sign_of_the_times.xlsx", sheet = 45, col_names = FALSE)
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
# Rename the first column to 'Covariates'
names(data)[names(data) == "...1"] <- "Covariates"

# Split Covariates Column into Name, Party, and State
data <-  data %>% separate(Covariates, c("Name","State_Party"), sep = -6, remove = FALSE) %>% separate(State_Party,c("State", "Party"), sep = -3)

# Pull out just the Party abbreviation from the string
data$Party <- str_sub(data$Party, -2, -2)

# Pull out just the State abbreviation from the string
data$State <- str_sub(data$State, -2,-1)
# Create a new dataframe called covariates with the Name, State, and Party
covariates <- data[, c(1:4)]

# Remove covariates from the adjacency matrix and call it network
network <- data[, c(5:length(data))]

# Index (both row and column) by the original covariates column which includes Name, State, and Party
row.names(network) <- covariates$Covariates
## Warning: Setting row names on a tibble is deprecated.
names(network) <- covariates$Covariates

# Turn the network into a matrix
network <- as.matrix(network)

# Turn the matrix into a weighted and undirected igraph obect - NOTE: the -1 value is used as a weight, but 
# is difficult to handle for some calculations such as betweenness.
g <- graph_from_adjacency_matrix(network, weighted = TRUE, mode = 'undirected')
# Add the number of negative relationships to covariates
covariates$Negative <- rowSums(network == '-1', na.rm=TRUE)

# Add the number of positive relationships to covariates
covariates$Positive <- rowSums(network == '1', na.rm = TRUE)

# Quick check on Total and Mean Positive and Negative values by Party
covariates %>% group_by(Party) %>% summarize(negs = sum(Negative), pos = sum(Positive))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 3 x 3
##   Party  negs   pos
##   <chr> <dbl> <dbl>
## 1 D      2147  1197
## 2 I        99    46
## 3 R      2276  1627
covariates %>% group_by(Party) %>% summarize(negs = mean(Negative), pos = mean(Positive))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 3 x 3
##   Party  negs   pos
##   <chr> <dbl> <dbl>
## 1 D      48.8  27.2
## 2 I      49.5  23  
## 3 R      42.1  30.1
# Add the covariates as attributes to the graph object
g <- set_vertex_attr(g, 'Party', value = covariates$Party)
g <- set_vertex_attr(g, 'State', value = covariates$State)
# Plot the graph and differentiate nodes by color
V(g)[Party == 'D']$color <- "blue"
V(g)[Party == 'R']$color <- "red"
V(g)[Party == 'I']$color <- "purple"
plot(g, layout = layout_nicely, vertex.label = NA)

# If we wish to get rid of the negative weights, we can set the positive weights to 2 and negative to 1.
# Just a bandaid for now.

#network[network == 1] <- 2
#network[network == -1] <- 1
#g2 <- as.undirected(graph_from_adjacency_matrix(network))

# Alternatively, we can create two separate graphs, one for positive connections and one for negative

# Create a copy of the network matrix, and get rid of all -1 values
pos_connections <- network
pos_connections[pos_connections == -1] <- 0

# Create a copy of the network matrix, and get rid of all +1 values and set -1 to +1
neg_connections <- network
neg_connections[neg_connections == 1] <- 0
neg_connections[neg_connections == -1] <- 1

# Turn the matrix into an igraph object and give it the Party and State attributes
g_pos <- as.undirected(graph_from_adjacency_matrix(pos_connections))
g_pos <- set_vertex_attr(g_pos, 'Party', value = covariates$Party)
g_pos <- set_vertex_attr(g_pos, 'State', value = covariates$State)

# Turn the matrix into an igraph object and give it the Party and State attributes
g_neg <- as.undirected(graph_from_adjacency_matrix(neg_connections))
g_neg <- set_vertex_attr(g_neg, 'Party', value = covariates$Party)
g_neg <- set_vertex_attr(g_neg, 'State', value = covariates$State)
# Mapping Negative Connections
V(g_neg)[Party == 'D']$color <- "blue"
V(g_neg)[Party == 'R']$color <- "red"
V(g_neg)[Party == 'I']$color <- "purple"
plot(g_neg, layout = layout_nicely, vertex.label = NA, main = 'Negative Connections by Party')

# Mapping Positive Connections
V(g_pos)[Party == 'D']$color <- "blue"
V(g_pos)[Party == 'R']$color <- "red"
V(g_pos)[Party == 'I']$color <- "purple"
plot(g_pos, layout = layout_nicely, vertex.label = NA, main = 'Positive Connections by Party')

# Side by Side Comparison of negative and positive connections
par(mfrow=c(1,2))
plot(g_neg, layout = layout_nicely, vertex.label = NA, main = 'Negative Connections by Party')
plot(g_pos, layout = layout_nicely, vertex.label = NA, main = 'Positive Connections by Party')

# Create subgraphs based on party
r_pos_g <- induced.subgraph(g_pos, V(g_pos)[Party == 'R'])
d_pos_g <- induced.subgraph(g_pos, V(g_pos)[Party == 'D'])
r_neg_g <- induced.subgraph(g_neg, V(g_neg)[Party == 'R'])
d_neg_g <- induced.subgraph(g_neg, V(g_neg)[Party == 'D'])
par(mfrow=c(2,2))
plot(r_pos_g, layout = layout_nicely, vertex.label = NA, main = paste0('Positve Relationships - Republicans\nAvg Degree: ', round(mean(degree(r_pos_g)),0)))
plot(d_pos_g, layout = layout_nicely, vertex.label = NA, main = paste0('Positve Relationships - Democrats\nAvg Degree: ', round(mean(degree(d_pos_g)),0)))
plot(r_neg_g, layout = layout_nicely, vertex.label = NA, main = paste0('Negative Relationships - Republicans\nAvg Degree: ', round(mean(degree(r_neg_g)),0)))
plot(d_neg_g, layout = layout_nicely, vertex.label = NA, main = paste0('Negative Relationships - Democrats\nAvg Degree: ', round(mean(degree(d_neg_g)),0)))

#Graph Properties ——

# Positives
par( mfrow=c(1,1) )
hist( degree( g_pos ), xlab="Degree", main="Degree Distribution\nSenate, 114th Session" )

summary( degree( g_pos ))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     4.0    22.0    31.0    28.7    37.0    49.0
sort( degree( g_pos ))
##  Murkowski, L. (AK-R)     Corker, B. (TN-R)     Warner, M. (VA-D) 
##                     4                     4                     4 
##    Manchin, J. (WV-D)     Nelson, B. (FL-D)  McCaskill, C. (MO-D) 
##                     4                     6                     7 
##       Reid, H. (NV-D)   Sullivan, D. (AK-R)     Tester, J. (MT-D) 
##                     8                     8                     9 
##       Paul, R. (KY-R)     Carper, T. (DE-D)     Shelby, R. (AL-R) 
##                     9                    10                    11 
##   Heitkamp, H. (ND-D)   Donnelly, J. (IN-D)    Portman, R. (OH-R) 
##                    11                    12                    13 
##     Heller, D. (NV-R)       King, A. (ME-I)      Sasse, B. (NE-R) 
##                    13                    13                    13 
##       Kirk, M. (IL-R)    Collins, S. (ME-R)  Alexander, L. (TN-R) 
##                    14                    15                    16 
##     Bennet, M. (CO-D)     Vitter, D. (LA-R)      Hatch, O. (UT-R) 
##                    19                    20                    21 
##  McConnell, M. (KY-R)        Lee, M. (UT-R)   Grassley, C. (IA-R) 
##                    22                    22                    23 
##      Flake, J. (AZ-R)    Cassidy, B. (LA-R)     Ayotte, K. (NH-R) 
##                    23                    23                    24 
##      Wyden, R. (OR-D)       Burr, R. (NC-R)      Udall, T. (NM-D) 
##                    25                    25                    25 
##    Gardner, C. (CO-R)   Cantwell, M. (WA-D)      Casey, R. (PA-D) 
##                    27                    28                    28 
##   Heinrich, M. (NM-D) Blumenthal, R. (CT-D)      Kaine, T. (VA-D) 
##                    28                    28                    28 
##  Feinstein, D. (CA-D)      Brown, S. (OH-D)       Reed, J. (RI-D) 
##                    29                    29                    30 
##     Markey, E. (MA-D)      Thune, J. (SD-R)       Cruz, T. (TX-R) 
##                    30                    30                    30 
##      Ernst, J. (IA-R)     Durbin, R. (IL-D)     McCain, J. (AZ-R) 
##                    30                    31                    31 
##    Schumer, C. (NY-D) Gillibrand, K. (NY-D)      Rubio, M. (FL-R) 
##                    31                    31                    31 
##     Schatz, B. (HI-D)     Toomey, P. (PA-R) Whitehouse, S. (RI-D) 
##                    31                    32                    32 
##    Merkley, J. (OR-D)     Hoeven, J. (ND-R)    Sanders, B. (VT-I) 
##                    32                    32                    33 
##  Klobuchar, A. (MN-D)    Johnson, R. (WI-R)     Warren, E. (MA-D) 
##                    33                    33                    33 
##     Booker, C. (NJ-D)     Murray, P. (WA-D)    Franken, A. (MN-D) 
##                    33                    34                    34 
##      Boxer, B. (CA-D)      Leahy, P. (VT-D)     Peters, G. (MI-D) 
##                    35                    35                    35 
##   Lankford, J. (OK-R)       Enzi, M. (WY-R)     Cardin, B. (MD-D) 
##                    35                    36                    36 
##  Menv©ndez, R. (NJ-D)      Moran, J. (KS-R)     Hirono, M. (HI-D) 
##                    36                    36                    36 
##    Shaheen, J. (NH-D)     Cornyn, J. (TX-R)    Baldwin, T. (WI-D) 
##                    36                    37                    37 
##     Murphy, C. (CT-D)     Rounds, M. (SD-R)     Graham, L. (SC-R) 
##                    37                    37                    38 
##   Mikulski, B. (MD-D)   Sessions, J. (AL-R)     Tillis, T. (NC-R) 
##                    38                    38                    38 
##      Risch, J. (ID-R)     Cotton, T. (AR-R)      Crapo, M. (ID-R) 
##                    39                    39                    40 
##    Fischer, D. (NE-R)      Coons, C. (DE-D)     Daines, S. (MT-R) 
##                    40                    41                    41 
##   Stabenow, D. (MI-D)     Capito, S. (WV-R)    Cochran, T. (MS-R) 
##                    42                    42                    43 
##     Inhofe, J. (OK-R)      Blunt, R. (MO-R)   Barrasso, J. (WY-R) 
##                    43                    43                    43 
##     Perdue, D. (GA-R)      Coats, D. (IN-R)    Roberts, P. (KS-R) 
##                    43                    44                    45 
##      Scott, T. (SC-R)    Isakson, J. (GA-R)     Wicker, R. (MS-R) 
##                    45                    46                    48 
##    Boozman, J. (AR-R) 
##                    49
head( sort( betweenness( g_pos )))
##    Nelson, B. (FL-D)      Reid, H. (NV-D) Murkowski, L. (AK-R) 
##            0.0000000            0.0000000            0.1250000 
##     Sasse, B. (NE-R)    Corker, B. (TN-R)      Paul, R. (KY-R) 
##            0.2102850            0.2421696            0.2935148
tail( sort( betweenness( g_pos )))
##  Gardner, C. (CO-R) Stabenow, D. (MI-D)  Shaheen, J. (NH-D)  Collins, S. (ME-R) 
##            280.8329            302.0910            509.8155            520.6650 
##    Coons, C. (DE-D)   Ayotte, K. (NH-R) 
##            714.1059            878.1226
head( sort( closeness( g_pos )))
##    Nelson, B. (FL-D)      Reid, H. (NV-D)    Corker, B. (TN-R) 
##          0.003448276          0.003472222          0.003521127 
##     Sasse, B. (NE-R) Murkowski, L. (AK-R)      Paul, R. (KY-R) 
##          0.003623188          0.003649635          0.003649635
tail( sort( closeness( g_pos )))
##   Blunt, R. (MO-R) Collins, S. (ME-R)  Wicker, R. (MS-R) Boozman, J. (AR-R) 
##        0.005235602        0.005347594        0.005376344        0.005405405 
##   Coons, C. (DE-D)  Ayotte, K. (NH-R) 
##        0.005405405        0.005681818
head( sort( eigen_centrality( g_pos )$vector ))
##    Warner, M. (VA-D)    Nelson, B. (FL-D) McCaskill, C. (MO-D) 
##          0.003072845          0.004129703          0.005202994 
##      Reid, H. (NV-D)    Carper, T. (DE-D)     Wyden, R. (OR-D) 
##          0.005604192          0.006275486          0.017292443
tail( sort( eigen_centrality( g_pos )$vector ))
## Barrasso, J. (WY-R)  Isakson, J. (GA-R)  Roberts, P. (KS-R)    Scott, T. (SC-R) 
##           0.9603429           0.9707034           0.9841938           0.9848756 
##   Wicker, R. (MS-R)  Boozman, J. (AR-R) 
##           0.9953603           1.0000000
graph.density( g_pos )
## [1] 0.289899
transitivity( g_pos )
## [1] 0.7749039
diameter( g_pos )
## [1] 5
is.connected( g_pos )
## [1] TRUE
# Negatives
hist( degree( g_neg ), xlab="Degree", main="Degree Distribution\nSenate, 114th Session" )

summary( degree( g_neg ))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   20.00   42.00   46.00   45.22   51.25   59.00
head( sort( betweenness( g_neg )))
##  Collins, S. (ME-R) Donnelly, J. (IN-D)  Manchin, J. (WV-D)     Kirk, M. (IL-R) 
##            2.125457            2.443340            3.831467            7.053243 
##   Capito, S. (WV-R) Heitkamp, H. (ND-D) 
##            8.032356            8.829110
tail( sort( betweenness( g_neg )))
##     Booker, C. (NJ-D)    Sanders, B. (VT-I)    Merkley, J. (OR-D) 
##              45.54376              52.85209              52.85209 
##     Warren, E. (MA-D) Blumenthal, R. (CT-D)     Markey, E. (MA-D) 
##              52.85209              69.82869              74.64750
head( sort( closeness( g_neg )))
##   Capito, S. (WV-R) Donnelly, J. (IN-D)  Cochran, T. (MS-R)  Collins, S. (ME-R) 
##         0.005524862         0.005617978         0.005649718         0.005649718 
##   Graham, L. (SC-R)  Boozman, J. (AR-R) 
##         0.005649718         0.005882353
tail( sort( closeness( g_neg )))
##     Booker, C. (NJ-D)    Sanders, B. (VT-I)    Merkley, J. (OR-D) 
##           0.006993007           0.007042254           0.007042254 
##     Warren, E. (MA-D) Blumenthal, R. (CT-D)     Markey, E. (MA-D) 
##           0.007042254           0.007142857           0.007194245
head( sort( eigen_centrality( g_neg )$vector ))
## Donnelly, J. (IN-D)  Collins, S. (ME-R)  Manchin, J. (WV-D) Heitkamp, H. (ND-D) 
##           0.3921403           0.4135856           0.5520099           0.5606483 
##   Capito, S. (WV-R)   Graham, L. (SC-R) 
##           0.5818628           0.6116328
tail( sort( eigen_centrality( g_neg )$vector ))
##     Booker, C. (NJ-D)    Merkley, J. (OR-D)    Sanders, B. (VT-I) 
##             0.9465488             0.9550356             0.9550356 
##     Warren, E. (MA-D) Blumenthal, R. (CT-D)     Markey, E. (MA-D) 
##             0.9550356             0.9910491             1.0000000
graph.density( g_neg )
## [1] 0.4567677
transitivity( g_neg )
## [1] 0.04519021
diameter( g_neg )
## [1] 3
is.connected( g_neg )
## [1] TRUE

#Single Network Modeling ——

# Community Detection using cluster_fast_greedy
block_pos <- cluster_fast_greedy(g_pos)
block_neg <- cluster_fast_greedy(g_neg)

# Plot the detected clusters and give node labels corresponding to party
par(mfrow=c(1,2))
plot(block_pos, g_pos, vertex.label = vertex_attr(g_pos)$Party, main = 'Positive Relationship - Clusters')
plot(block_neg, g_neg, vertex.label = vertex_attr(g_neg)$Party, main = 'Negative Relationships- Clusters')

ESTIMATED PROBABILITIES FOR POSITIVE NETWORKS - BY HAND

n_membership = rep(0, 3)

n_membership[1] =  sum(block_pos$membership == 1) # returns number of nodes in community 1.
n_membership[2] =  sum(block_pos$membership == 2) # returns number of nodes in community 2.
n_membership[3] =  sum(block_pos$membership == 3) # returns number of nodes in community 3.

hat_P = matrix(rep(0, 9), ncol = 3)
for(i in 1:3){
  for(j in 1:3){
  if (i != j){
  hat_P[i,j] = 1/(n_membership[i] * n_membership[j]) * sum(g_pos[(block_pos$membership == i), (block_pos$membership == j)])
  }
  }
}

# Now fill in the diagonal 
for(i in 1:3){
  hat_P[i,i] = choose(n_membership[i], 2)^(-1) * sum(g_pos[(block_pos$membership == i), (block_pos$membership == i)])/2
}
print(hat_P)
##            [,1]        [,2]        [,3]
## [1,] 0.66666667 0.100628931 0.083333333
## [2,] 0.10062893 0.576923077 0.002572899
## [3,] 0.08333333 0.002572899 0.639534884

ESTIMATED PROBABILITIES FOR NEGATIVE NETWORKS - BY HAND

n_membership = rep(0, 3)

n_membership[1] = sum(block_neg$membership == 1) # returns number of nodes in community 1.
n_membership[2] = sum(block_neg$membership == 2) # returns number of nodes in community 2.
n_membership[3] = sum(block_neg$membership == 3) # returns number of nodes in community 3.

hat_P = matrix(rep(0, 9), ncol = 3)
for(i in 1:3){
  for(j in 1:3){
  if (i != j){
  hat_P[i,j] = 1/(n_membership[i] * n_membership[j]) * sum(g_neg[(block_neg$membership == i), (block_neg$membership == j)])
  }
  }
}

# Now fill in the diagonal 
for(i in 1:3){
  hat_P[i,i] = choose(n_membership[i], 2)^(-1) * sum(g_neg[(block_neg$membership == i), (block_neg$membership == i)])/2
}
print(hat_P)
##           [,1]      [,2]      [,3]
## [1,] 0.5134146 0.4451220 0.4168514
## [2,] 0.4451220 0.6666667 0.4409091
## [3,] 0.4168514 0.4409091 0.4888889
# Maybe we want a degree corrected stochastic block model: let's check the degree distribution per communitiy
degree_dist_comm_table <- table(block_pos$membership, degree(g_pos))
degree_dist_comm_1 <- degree_dist_comm_table[1,][degree_dist_comm_table[1,] > 0]
degree_dist_comm_2 <- degree_dist_comm_table[2,][degree_dist_comm_table[2,] > 0]
degree_dist_comm_3 <- degree_dist_comm_table[3,][degree_dist_comm_table[3,] > 0]

par(mfrow = c(1,3))
hist(as.integer(names(degree_dist_comm_1)))
hist(as.integer(names(degree_dist_comm_2)))
hist(as.integer(names(degree_dist_comm_3)))

# Degree Corrected Stochastic Block Model using randnet: https://cran.r-project.org/web/packages/randnet/randnet.pdf

# Need an adjacency matrix
g_pos_adj_matrix <- as_adjacency_matrix(g_pos)
g_neg_adj_matrix <- as_adjacency_matrix(g_neg)

# Pass in the adjacency matrix and the membership computed via cluster_fast_greedy
dcsbm_pos <- DCSBM.estimate(g_pos_adj_matrix, block_pos$membership)
dcsbm_neg <- DCSBM.estimate(g_neg_adj_matrix, block_neg$membership)

# Show the connection probability matrix
dcsbm_pos$B
##        [,1]     [,2]     [,3]
## [1,]  4.001   16.001   11.001
## [2,] 16.001 1590.001    6.001
## [3,] 11.001    6.001 1210.001
dcsbm_neg$B
##         [,1]   [,2]     [,3]
## [1,] 842.001 73.001  940.001
## [2,]  73.001  8.001   97.001
## [3,] 940.001 97.001 1452.001

ESTIMATED PROBABILITIES FOR POSITIVE NETWORKS - USING SBM.ESTIMATE()

sbm_pos <- SBM.estimate(g_pos_adj_matrix, block_pos$membership)
sbm_pos$B
##            [,1]        [,2]        [,3]
## [1,] 0.66666667 0.100628931 0.083333333
## [2,] 0.10062893 0.576923077 0.002572899
## [3,] 0.08333333 0.002572899 0.639534884
# Create a table that breaks membership computed via cluster_fast_greedy along party lines
table(vertex_attr(g_pos)$Party, block_pos$membership)
##    
##      1  2  3
##   D  2  0 42
##   I  0  0  2
##   R  1 53  0
table(vertex_attr(g_neg)$Party, block_neg$membership)
##    
##      1  2  3
##   D 17  2 25
##   I  1  0  1
##   R 23  2 29

Democrats and Republicans have some slight overlap in the first cluster for positive relationships, let’s see who the senators are.

vertex_attr(g_pos)$name[block_pos$membership == 1]
## [1] "Collins, S. (ME-R)"  "Donnelly, J. (IN-D)" "Manchin, J. (WV-D)"

Calculate by hand by creating an edge list, turning that into a dataframe

el_pos <- as_edgelist(g_pos)
el_neg <- as_edgelist(g_neg)
el_pos_df <- as.data.frame(el_pos)
el_neg_df <- as.data.frame(el_neg)
# Split Covariates Column into Name, Party, and State
el_pos_df <-  el_pos_df %>% separate(V1, c("Name_v1","State_Party"), sep = -6, remove = FALSE) %>% separate(State_Party,c("State_v1", "Party_v1"), sep = -3)
el_pos_df <-  el_pos_df %>% separate(V2, c("Name_v2","State_Party"), sep = -6, remove = FALSE) %>% separate(State_Party,c("State_v2", "Party_v2"), sep = -3)
el_neg_df <-  el_neg_df %>% separate(V1, c("Name_v1","State_Party"), sep = -6, remove = FALSE) %>% separate(State_Party,c("State_v1", "Party_v1"), sep = -3)
el_neg_df <-  el_neg_df %>% separate(V2, c("Name_v2","State_Party"), sep = -6, remove = FALSE) %>% separate(State_Party,c("State_v2", "Party_v2"), sep = -3)
# Pull out just the Party abbreviation from the string
el_pos_df$Party_v1 <- str_sub(el_pos_df$Party_v1, -2, -2)
el_pos_df$Party_v2 <- str_sub(el_pos_df$Party_v2, -2, -2)
el_neg_df$Party_v1 <- str_sub(el_neg_df$Party_v1, -2, -2)
el_neg_df$Party_v2 <- str_sub(el_neg_df$Party_v2, -2, -2)
# Pull out just the State abbreviation from the string
el_pos_df$State_v1 <- str_sub(el_pos_df$State_v1, -2,-1)
el_pos_df$State_v2 <- str_sub(el_pos_df$State_v2, -2,-1)
el_neg_df$State_v1 <- str_sub(el_neg_df$State_v1, -2,-1)
el_neg_df$State_v2 <- str_sub(el_neg_df$State_v2, -2,-1)
# Add Column to signify whether interaction was from the same party
el_pos_df$Same_Party <- el_pos_df$Party_v1 == el_pos_df$Party_v2
el_neg_df$Same_Party <- el_neg_df$Party_v1 == el_neg_df$Party_v2

# Add the 'SampeParty' color as an Edge Attribute to make graphs easier to see
g_pos <- set_edge_attr(g_pos, 'SameParty', value = el_pos_df$Same_Party)
g_neg <- set_edge_attr(g_neg, 'SameParty', value = el_neg_df$Same_Party)
# Map positive connections and color edges by whether or not they link nodes of the same party
par(mfrow = c(1,2))

V(g_neg)[Party == 'D']$color <- "blue"
V(g_neg)[Party == 'R']$color <- "red"
V(g_neg)[Party == 'I']$color <- "purple"
E(g_neg)[SameParty == TRUE]$color <- "green"
E(g_neg)[SameParty == FALSE]$color <- "black"
plot(g_neg, layout = layout_nicely, vertex.label = NA, main = 'Negative Connections by Party', vertex.size = 2.5)

legend(x = "topleft", legend = c(TRUE, FALSE), pch = 19, col = c("green", "black"), title = "Same-Party Connection")

V(g_pos)[Party == 'D']$color <- "blue"
V(g_pos)[Party == 'R']$color <- "red"
V(g_pos)[Party == 'I']$color <- "purple"
E(g_pos)[SameParty == TRUE]$color <- "green"
E(g_pos)[SameParty == FALSE]$color <- "black"
plot(g_pos, layout = layout_nicely, vertex.label = NA, main = 'Positive Connections by Party', vertex.size = 2.5)

table(el_pos_df$Party_v1, el_pos_df$Party_v2)
##    
##       D   I   R
##   D 567  26   4
##   I  19   0   0
##   R  14   1 804
table(el_neg_df$Party_v1, el_neg_df$Party_v2)
##    
##        D    I    R
##   D   25    0 1171
##   I    2    0   38
##   R  924   59   42
el_pos_df %>% filter(Party_v1 == 'D' & Party_v2 == 'R')
##                   V1      Name_v1 State_v1 Party_v1                 V2
## 1  Bennet, M. (CO-D)  Bennet, M.        CO        D Gardner, C. (CO-R)
## 2 Shaheen, J. (NH-D) Shaheen, J.        NH        D  Ayotte, K. (NH-R)
## 3   Coons, C. (DE-D)   Coons, C.        DE        D  Ayotte, K. (NH-R)
## 4  Tester, J. (MT-D)  Tester, J.        MT        D  Daines, S. (MT-R)
##        Name_v2 State_v2 Party_v2 Same_Party
## 1 Gardner, C.        CO        R      FALSE
## 2  Ayotte, K.        NH        R      FALSE
## 3  Ayotte, K.        NH        R      FALSE
## 4  Daines, S.        MT        R      FALSE

Looks like a lot of the positive relationships across political parties comes from within-state relationships - two layers of homophily?

ANOTHER WAY TO EXAMPLE RELATIONSHIPS BETWEEN PARTIES IS BY USING ASSORTATIVITY: https://dshizuka.github.io/networkanalysis/05_community.html

# USE THE +1 so that as.integer results in 1s and 2s not 0s and 1s
assortativity(g_pos, (V(g_pos)$Party == "D"), (V(g_pos)$Party == 'D') +1, directed = FALSE)
## Warning in assortativity(g_pos, (V(g_pos)$Party == "D"), (V(g_pos)$Party == : At
## mixing.c:184 :Only `types1' is used for undirected case
## [1] 0.9097115
assortativity_nominal(g_pos, (V(g_pos)$Party == 'R')+1)
## [1] 0.9730365

Why does this give us a different value than the proportion table?

#Single Network Modeling Part 2 ——

data <- read_excel("a_sign_of_the_times.xlsx", sheet = 45, col_names = FALSE)
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
# Rename the first column to 'Covariates'
names(data)[names(data) == "...1"] <- "Covariates"

# Split Covariates Column into Name, Party, and State
data <-  data %>% separate(Covariates, c("Name","State_Party"), sep = -6, remove = FALSE) %>% separate(State_Party,c("State", "Party"), sep = -3)

# Pull out just the Party abbreviation from the string
data$Party <- str_sub(data$Party, -2, -2)

# Pull out just the State abbreviation from the string
data$State <- str_sub(data$State, -2,-1)

# Create a new dataframe called covariates with the Name, State, and Party
covariates <- data[, c(1:4)]

# Remove covariates from the adjacency matrix and call it network
network <- data[, c(5:length(data))]

# Index (both row and column) by the original covariates column which includes Name, State, and Party
row.names(network) <- covariates$Covariates
## Warning: Setting row names on a tibble is deprecated.
names(network) <- covariates$Covariates

# Turn the network into a matrix
network <- as.matrix(network)

# Turn the matrix into a weighted and undirected igraph obect - NOTE: the -1 value is used as a weight, but 
# is difficult to handle for some calculations such as betweenness.
g <- graph_from_adjacency_matrix(network, weighted = TRUE, mode = 'undirected')

covariates$Negative <- rowSums(network == '-1', na.rm=TRUE)

# Add the number of positive relationships to covariates
covariates$Positive <- rowSums(network == '1', na.rm = TRUE)

# Add the covariates as attributes to the graph object
g <- set_vertex_attr(g, 'Party', value = covariates$Party)
g <- set_vertex_attr(g, 'State', value = covariates$State)
# If we wish to get rid of the negative weights, we can set the positive weights to 2 and negative to 1.
# Just a bandaid for now.

#network[network == 1] <- 2
#network[network == -1] <- 1
#g2 <- as.undirected(graph_from_adjacency_matrix(network))

# Alternatively, we can create two separate graphs, one for positive connections and one for negative

# Create a copy of the network matrix, and get rid of all -1 values
pos_connections <- network
pos_connections[pos_connections == -1] <- 0

# Create a copy of the network matrix, and get rid of all +1 values and set -1 to +1
neg_connections <- network
neg_connections[neg_connections == 1] <- 0
neg_connections[neg_connections == -1] <- 1

# Turn the matrix into an igraph object and give it the Party and State attributes
g_pos <- as.undirected(graph_from_adjacency_matrix(pos_connections))
g_pos <- set_vertex_attr(g_pos, 'Party', value = covariates$Party)
g_pos <- set_vertex_attr(g_pos, 'State', value = covariates$State)

# Turn the matrix into an igraph object and give it the Party and State attributes
g_neg <- as.undirected(graph_from_adjacency_matrix(neg_connections))
g_neg <- set_vertex_attr(g_neg, 'Party', value = covariates$Party)
g_neg <- set_vertex_attr(g_neg, 'State', value = covariates$State)

ESTIMATED PROBABILITIES FOR POSITIVE NETWORKS - USING SBM.ESTIMATE()

# Community Detection using cluster_fast_greedy
block_pos <- cluster_edge_betweenness(g_pos)
sbm_pos <- SBM.estimate(g_pos_adj_matrix, block_pos$membership)
sbm_pos$B
##             [,1]        [,2]
## [1,] 0.527922078 0.006899351
## [2,] 0.006899351 0.639534884

ESTIMATED PROBABILITIES FOR POSITIVE NETWORKS - USING latentnet

g_pos_adj_matrix <- as.matrix(as_adjacency_matrix(g_pos))
g_neg_adj_matrix <- as.matrix(as_adjacency_matrix(g_neg))
g_pos_net=network(g_pos_adj_matrix,directed = FALSE)
g_neg_net=network(g_neg_adj_matrix,directed = FALSE)

hist(degree(g_pos),breaks=50)

Same_party=g_pos_adj_matrix
Same_Stat=g_pos_adj_matrix
for(i in 1:(dim(Same_party)[1]-1))
{
  for(j in (i+1):dim(Same_party)[1])
  {
    Same_party[i,j]=as.numeric(covariates$Party[i]==covariates$Party[j])
    Same_party[j,i]=Same_party[i,j]
    Same_Stat[i,j]=as.numeric(covariates$State[i]==covariates$State[j])
    Same_Stat[j,i]=Same_Stat[i,j]
  }
}
result1=ergmm(g_pos_net ~ euclidean(d = 2,G=2), tofit="mle")

#result1=ergmm(g_pos_net ~ euclidean(d = 2,G=3), tofit="mle")

# result2=ergmm(g_pos_net ~edgecov(Same_party) + euclidean(d = 2,G=2))
# 
# result3=ergmm(g_pos_net ~ edgecov(Same_party) + edgecov(Same_Stat) + euclidean(d = 2,G=2))

oneLcolors<-covariates$Party
oneLcolors[oneLcolors=='D']='blue'
oneLcolors[oneLcolors=='R']='red'
oneLcolors[oneLcolors=='I']='purple'

plot(result1,what = "mle")

plot(result1,vertex.col = oneLcolors,what = "mle", main = "MLE positions", print.formula = FALSE,labels = FALSE,vertex.cex = 2,xlim=c(-15,15),ylim=c(-15,15))

#Single Network Modeling Part 3 ——

ERGM Modeling - 114th session

expit <- function(x){
exp(x) / (1 + exp(x))
}

# Full Network ERGM ####
g.ergm <- network(as.matrix(as_adjacency_matrix(g)))
set.vertex.attribute(g.ergm,"Party",vertex.attributes(g)$Party)
set.vertex.attribute(g.ergm,"State",vertex.attributes(g)$State)
set.vertex.attribute(g.ergm,"Name",vertex.attributes(g)$name)
M1 <- ergm(g.ergm ~ edges, 
           control = control.ergm(MCMC.burnin = 5000, 
                                  MCMC.interval =  10,
                                  MCMLE.maxit = 10))
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate.
summary(M1) # Potentially use for the poster
## Call:
## ergm(formula = g.ergm ~ edges, control = control.ergm(MCMC.burnin = 5000, 
##     MCMC.interval = 10, MCMLE.maxit = 10))
## 
## Iterations:  5 out of 10 
## 
## Monte Carlo MLE Results:
##       Estimate Std. Error MCMC % z value Pr(>|z|)    
## edges  1.08091    0.02311      0   46.77   <1e-04 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##      Null Deviance: 13724  on 9900  degrees of freedom
##  Residual Deviance: 11206  on 9899  degrees of freedom
##  
## AIC: 11208    BIC: 11215    (Smaller is better.)
M1.1 <- ergm(g.ergm ~ edges + match("Party"), 
           control = control.ergm(MCMC.burnin = 5000, 
                                  MCMC.interval =  10,
                                  MCMLE.maxit = 10))
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate.
summary(M1.1)
## Call:
## ergm(formula = g.ergm ~ edges + match("Party"), control = control.ergm(MCMC.burnin = 5000, 
##     MCMC.interval = 10, MCMLE.maxit = 10))
## 
## Iterations:  5 out of 10 
## 
## Monte Carlo MLE Results:
##                 Estimate Std. Error MCMC % z value Pr(>|z|)    
## edges            1.97284    0.04259      0   46.33   <1e-04 ***
## nodematch.Party -1.54771    0.05190      0  -29.82   <1e-04 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##      Null Deviance: 13724  on 9900  degrees of freedom
##  Residual Deviance: 10201  on 9898  degrees of freedom
##  
## AIC: 10205    BIC: 10219    (Smaller is better.)
M1.2 <- ergm(g.ergm ~ edges + match("State"), 
           control = control.ergm(MCMC.burnin = 5000, 
                                  MCMC.interval =  10,
                                  MCMLE.maxit = 10))
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate.
summary(M1.2)
## Call:
## ergm(formula = g.ergm ~ edges + match("State"), control = control.ergm(MCMC.burnin = 5000, 
##     MCMC.interval = 10, MCMLE.maxit = 10))
## 
## Iterations:  5 out of 10 
## 
## Monte Carlo MLE Results:
##                 Estimate Std. Error MCMC % z value Pr(>|z|)    
## edges            1.07481    0.02319      0  46.345   <1e-04 ***
## nodematch.State  0.74048    0.28913      0   2.561   0.0104 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##      Null Deviance: 13724  on 9900  degrees of freedom
##  Residual Deviance: 11198  on 9898  degrees of freedom
##  
## AIC: 11202    BIC: 11217    (Smaller is better.)
M1.3 <- ergm(g.ergm ~ edges + match("Party") + match("State"), 
           control = control.ergm(MCMC.burnin = 5000, 
                                  MCMC.interval =  10,
                                  MCMLE.maxit = 10))
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate.
summary(M1.3)
## Call:
## ergm(formula = g.ergm ~ edges + match("Party") + match("State"), 
##     control = control.ergm(MCMC.burnin = 5000, MCMC.interval = 10, 
##         MCMLE.maxit = 10))
## 
## Iterations:  6 out of 10 
## 
## Monte Carlo MLE Results:
##                 Estimate Std. Error MCMC % z value Pr(>|z|)    
## edges            1.96860    0.04260      0  46.210  < 1e-04 ***
## nodematch.Party -1.55685    0.05197      0 -29.958  < 1e-04 ***
## nodematch.State  1.09830    0.29422      0   3.733 0.000189 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##      Null Deviance: 13724  on 9900  degrees of freedom
##  Residual Deviance: 10183  on 9897  degrees of freedom
##  
## AIC: 10189    BIC: 10211    (Smaller is better.)
# General Positive ####
g_pos.ergm <- network(as.matrix(as_adjacency_matrix(g_pos)))
set.vertex.attribute(g_pos.ergm,"Party",vertex.attributes(g_pos)$Party)
set.vertex.attribute(g_pos.ergm,"State",vertex.attributes(g_pos)$State)
set.vertex.attribute(g_pos.ergm,"Name",vertex.attributes(g_pos)$name)
M2 <- ergm(g_pos.ergm ~ edges, 
           control = control.ergm(MCMC.burnin = 5000, 
                                  MCMC.interval =  10,
                                  MCMLE.maxit = 10))
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate.
summary(M2)  # Potentially use for the poster
## Call:
## ergm(formula = g_pos.ergm ~ edges, control = control.ergm(MCMC.burnin = 5000, 
##     MCMC.interval = 10, MCMLE.maxit = 10))
## 
## Iterations:  4 out of 10 
## 
## Monte Carlo MLE Results:
##       Estimate Std. Error MCMC % z value Pr(>|z|)    
## edges -0.89587    0.02215      0  -40.45   <1e-04 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##      Null Deviance: 13724  on 9900  degrees of freedom
##  Residual Deviance: 11921  on 9899  degrees of freedom
##  
## AIC: 11923    BIC: 11930    (Smaller is better.)
M2.1 <- ergm(g_pos.ergm ~ edges + match("Party"), 
           control = control.ergm(MCMC.burnin = 5000, 
                                  MCMC.interval =  10,
                                  MCMLE.maxit = 10))
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate.
summary(M2.1)
## Call:
## ergm(formula = g_pos.ergm ~ edges + match("Party"), control = control.ergm(MCMC.burnin = 5000, 
##     MCMC.interval = 10, MCMLE.maxit = 10))
## 
## Iterations:  6 out of 10 
## 
## Monte Carlo MLE Results:
##                 Estimate Std. Error MCMC % z value Pr(>|z|)    
## edges           -3.66836    0.08949      0  -40.99   <1e-04 ***
## nodematch.Party  3.97692    0.09418      0   42.23   <1e-04 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##      Null Deviance: 13724  on 9900  degrees of freedom
##  Residual Deviance:  7680  on 9898  degrees of freedom
##  
## AIC: 7684    BIC: 7698    (Smaller is better.)
M2.1.E.EP <- expit(M2.1$coef[1])
M2.1.E.EP <- c(M2.1.E.EP, expit(M2.1$coef[1] + M2.1$coef[2]))
M2.1.E.EP
##      edges      edges 
## 0.02488336 0.57653490
M2.2 <- ergm(g_pos.ergm ~ edges + match("State"), 
           control = control.ergm(MCMC.burnin = 5000, 
                                  MCMC.interval =  10,
                                  MCMLE.maxit = 10))
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate.
summary(M2.2)
## Call:
## ergm(formula = g_pos.ergm ~ edges + match("State"), control = control.ergm(MCMC.burnin = 5000, 
##     MCMC.interval = 10, MCMLE.maxit = 10))
## 
## Iterations:  5 out of 10 
## 
## Monte Carlo MLE Results:
##                 Estimate Std. Error MCMC % z value Pr(>|z|)    
## edges           -0.92330    0.02239      0 -41.229   <1e-04 ***
## nodematch.State  2.58153    0.27369      0   9.432   <1e-04 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##      Null Deviance: 13724  on 9900  degrees of freedom
##  Residual Deviance: 11788  on 9898  degrees of freedom
##  
## AIC: 11792    BIC: 11807    (Smaller is better.)
M2.3 <- ergm(g_pos.ergm ~ edges + match("Party") + match("State"), 
           control = control.ergm(MCMC.burnin = 5000, 
                                  MCMC.interval =  10,
                                  MCMLE.maxit = 10))
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate.
summary(M2.3)  # Potentially use for the poster
## Call:
## ergm(formula = g_pos.ergm ~ edges + match("Party") + match("State"), 
##     control = control.ergm(MCMC.burnin = 5000, MCMC.interval = 10, 
##         MCMLE.maxit = 10))
## 
## Iterations:  6 out of 10 
## 
## Monte Carlo MLE Results:
##                 Estimate Std. Error MCMC % z value Pr(>|z|)    
## edges           -3.79134    0.09477      0  -40.01   <1e-04 ***
## nodematch.Party  4.07504    0.09918      0   41.09   <1e-04 ***
## nodematch.State  3.81096    0.35030      0   10.88   <1e-04 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##      Null Deviance: 13724  on 9900  degrees of freedom
##  Residual Deviance:  7540  on 9897  degrees of freedom
##  
## AIC: 7546    BIC: 7568    (Smaller is better.)
# General Negative ####
g_neg.ergm <- network(as.matrix(as_adjacency_matrix(g_neg)))
set.vertex.attribute(g_neg.ergm,"Party",vertex.attributes(g_neg)$Party)
set.vertex.attribute(g_neg.ergm,"State",vertex.attributes(g_neg)$State)
set.vertex.attribute(g_neg.ergm,"Name",vertex.attributes(g_neg)$name)
M3 <- ergm(g_neg.ergm ~ edges, 
           control = control.ergm(MCMC.burnin = 5000, 
                                  MCMC.interval =  10,
                                  MCMLE.maxit = 10))
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate.
summary(M3)  # Potentially use for the poster
## Call:
## ergm(formula = g_neg.ergm ~ edges, control = control.ergm(MCMC.burnin = 5000, 
##     MCMC.interval = 10, MCMLE.maxit = 10))
## 
## Iterations:  3 out of 10 
## 
## Monte Carlo MLE Results:
##       Estimate Std. Error MCMC % z value Pr(>|z|)    
## edges -0.17336    0.02018      0  -8.592   <1e-04 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##      Null Deviance: 13724  on 9900  degrees of freedom
##  Residual Deviance: 13650  on 9899  degrees of freedom
##  
## AIC: 13652    BIC: 13659    (Smaller is better.)
M3.1 <- ergm(g_neg.ergm ~ edges + match("Party"), 
           control = control.ergm(MCMC.burnin = 5000, 
                                  MCMC.interval =  10,
                                  MCMLE.maxit = 10))
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate.
summary(M3.1)
## Call:
## ergm(formula = g_neg.ergm ~ edges + match("Party"), control = control.ergm(MCMC.burnin = 5000, 
##     MCMC.interval = 10, MCMLE.maxit = 10))
## 
## Iterations:  6 out of 10 
## 
## Monte Carlo MLE Results:
##                 Estimate Std. Error MCMC % z value Pr(>|z|)    
## edges            1.75859    0.03938      0   44.66   <1e-04 ***
## nodematch.Party -5.29933    0.09606      0  -55.17   <1e-04 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##      Null Deviance: 13724  on 9900  degrees of freedom
##  Residual Deviance:  5515  on 9898  degrees of freedom
##  
## AIC: 5519    BIC: 5534    (Smaller is better.)
M3.2 <- ergm(g_neg.ergm ~ edges + match("State"), 
           control = control.ergm(MCMC.burnin = 5000, 
                                  MCMC.interval =  10,
                                  MCMLE.maxit = 10))
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate.
summary(M3.2)
## Call:
## ergm(formula = g_neg.ergm ~ edges + match("State"), control = control.ergm(MCMC.burnin = 5000, 
##     MCMC.interval = 10, MCMLE.maxit = 10))
## 
## Iterations:  5 out of 10 
## 
## Monte Carlo MLE Results:
##                 Estimate Std. Error MCMC % z value Pr(>|z|)    
## edges           -0.15541    0.02026      0  -7.669   <1e-04 ***
## nodematch.State -3.73641    0.71438      0  -5.230   <1e-04 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##      Null Deviance: 13724  on 9900  degrees of freedom
##  Residual Deviance: 13546  on 9898  degrees of freedom
##  
## AIC: 13550    BIC: 13565    (Smaller is better.)
M3.3 <- ergm(g_neg.ergm ~ edges + match("Party") + match("State"), 
           control = control.ergm(MCMC.burnin = 5000, 
                                  MCMC.interval =  10,
                                  MCMLE.maxit = 10))
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate.
summary(M3.3)  # Potentially use for the poster
## Call:
## ergm(formula = g_neg.ergm ~ edges + match("Party") + match("State"), 
##     control = control.ergm(MCMC.burnin = 5000, MCMC.interval = 10, 
##         MCMLE.maxit = 10))
## 
## Iterations:  6 out of 10 
## 
## Monte Carlo MLE Results:
##                 Estimate Std. Error MCMC % z value Pr(>|z|)    
## edges            1.79591    0.04002      0  44.875   <1e-04 ***
## nodematch.Party -5.32158    0.09635      0 -55.233   <1e-04 ***
## nodematch.State -4.44795    0.73237      0  -6.073   <1e-04 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##      Null Deviance: 13724  on 9900  degrees of freedom
##  Residual Deviance:  5417  on 9897  degrees of freedom
##  
## AIC: 5423    BIC: 5445    (Smaller is better.)
# Pos. Democratic ERGM ####
g_pos_dem.ergm <- network(as.matrix(as_adjacency_matrix(d_pos_g)))
set.vertex.attribute(g_pos_dem.ergm,"State",vertex.attributes(d_pos_g)$State)
M4 <- ergm(g_pos_dem.ergm ~ edges, 
           control = control.ergm(MCMC.burnin = 5000, 
                                  MCMC.interval =  10,
                                  MCMLE.maxit = 10))
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate.
summary(M4)
## Call:
## ergm(formula = g_pos_dem.ergm ~ edges, control = control.ergm(MCMC.burnin = 5000, 
##     MCMC.interval = 10, MCMLE.maxit = 10))
## 
## Iterations:  4 out of 10 
## 
## Monte Carlo MLE Results:
##       Estimate Std. Error MCMC % z value Pr(>|z|)    
## edges  0.40282    0.04692      0   8.586   <1e-04 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##      Null Deviance: 2623  on 1892  degrees of freedom
##  Residual Deviance: 2548  on 1891  degrees of freedom
##  
## AIC: 2550    BIC: 2555    (Smaller is better.)
# Neg. Democratic ERGM ####
g_neg_dem.ergm <- network(as.matrix(as_adjacency_matrix(d_neg_g)))
set.vertex.attribute(g_neg_dem.ergm,"State",vertex.attributes(d_neg_g)$State)
M5 <- ergm(g_neg_dem.ergm ~ edges, 
           control = control.ergm(MCMC.burnin = 5000, 
                                  MCMC.interval =  10,
                                  MCMLE.maxit = 10))
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate.
summary(M5)
## Call:
## ergm(formula = g_neg_dem.ergm ~ edges, control = control.ergm(MCMC.burnin = 5000, 
##     MCMC.interval = 10, MCMLE.maxit = 10))
## 
## Iterations:  6 out of 10 
## 
## Monte Carlo MLE Results:
##       Estimate Std. Error MCMC % z value Pr(>|z|)    
## edges  -3.6066     0.1433      0  -25.17   <1e-04 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##      Null Deviance: 2623  on 1892  degrees of freedom
##  Residual Deviance:  462  on 1891  degrees of freedom
##  
## AIC: 464    BIC: 469.5    (Smaller is better.)
# Pos. Republican ERGM ####
g_pos_rep.ergm <- network(as.matrix(as_adjacency_matrix(r_pos_g)))
set.vertex.attribute(g_pos_rep.ergm,"State",vertex.attributes(r_pos_g)$State)
M6 <- ergm(g_pos_rep.ergm ~ edges, 
           control = control.ergm(MCMC.burnin = 5000, 
                                  MCMC.interval =  10,
                                  MCMLE.maxit = 10))
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate.
summary(M6)
## Call:
## ergm(formula = g_pos_rep.ergm ~ edges, control = control.ergm(MCMC.burnin = 5000, 
##     MCMC.interval = 10, MCMLE.maxit = 10))
## 
## Iterations:  4 out of 10 
## 
## Monte Carlo MLE Results:
##       Estimate Std. Error MCMC % z value Pr(>|z|)    
## edges  0.24865    0.03767      0     6.6   <1e-04 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##      Null Deviance: 3968  on 2862  degrees of freedom
##  Residual Deviance: 3924  on 2861  degrees of freedom
##  
## AIC: 3926    BIC: 3932    (Smaller is better.)
# Neg. Republican ERGM ####
g_neg_rep.ergm <- network(as.matrix(as_adjacency_matrix(r_neg_g)))
set.vertex.attribute(g_neg_rep.ergm,"State",vertex.attributes(r_neg_g)$State)
M7 <- ergm(g_neg_rep.ergm ~ edges, 
           control = control.ergm(MCMC.burnin = 5000, 
                                  MCMC.interval =  10,
                                  MCMLE.maxit = 10))
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate.
summary(M7)
## Call:
## ergm(formula = g_neg_rep.ergm ~ edges, control = control.ergm(MCMC.burnin = 5000, 
##     MCMC.interval = 10, MCMLE.maxit = 10))
## 
## Iterations:  6 out of 10 
## 
## Monte Carlo MLE Results:
##       Estimate Std. Error MCMC % z value Pr(>|z|)    
## edges  -3.4987     0.1107      0  -31.59   <1e-04 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##      Null Deviance: 3967.6  on 2862  degrees of freedom
##  Residual Deviance:  758.3  on 2861  degrees of freedom
##  
## AIC: 760.3    BIC: 766.3    (Smaller is better.)

#Longitudinal Set Up & Basic Analysis ——

# Read in Data from each tab and store a dataframe in a list so df is a list of dataframes
df <- c()
for (i in 1:45){
  df[[i]] <- read_excel("a_sign_of_the_times.xlsx", sheet = i, col_names = FALSE)
}
## New names:
## * `` -> ...1
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
# Remove the first element from the list because the first tab is a read_me page and does not contain data
df <- df[-1]
# For each dataframe, rename the first column to 'Covariates', split Covariates Column into Name, Party, and State,
# and pull out just the Party and State abbreviations from the new columns
for (i in seq_along(df)){
  names(df[[i]])[names(df[[i]]) == "...1"] <- "Covariates"
  
  df[[i]] <- df[[i]] %>% separate(Covariates, c("Name","State_Party"), sep = -6, remove = FALSE) %>% separate(State_Party,c("State",    "Party"), sep = -3)
  
  df[[i]]$Party <- str_sub(df[[i]]$Party, -2, -2)
  
  df[[i]]$State <- str_sub(df[[i]]$State, -2, -1)
}
# Create a new list of dataframes called covariates
covariates_dfs <- c()

# Create a new list of dataframes called networks
networks_dfs <- c()

for (i in seq_along(df)){
  # Loop through the list of dataframes and for each dataframe pull out the first four columns of 
  # covariate data and add those columns as a new element in the covariates list
  covariates_dfs[[i]] <- df[[i]][, c(1:4)]
  
  #Loop through the list of dataframes and for each dataframe remove the first four columns 
  # and add the remaining columns as a new element in the networks list
  networks_dfs[[i]] <- df[[i]][, c(5:length(df[[i]]))]
}
# Duplicate names for a couple of congresses so I need to tweak them since you can't have duplicate record IDs

#https://en.wikipedia.org/wiki/97th_United_States_Congress
covariates_dfs[[5]]$Covariates[[94]] <- 'Ashbrook, Jean. (OH-R)'

#https://en.wikipedia.org/wiki/112th_United_States_Congress
covariates_dfs[[20]]$Covariates[[450]] <- 'Payne, D Jr. (NJ-D)'
# Loop through the networks_dfs and index (both row and column) by the original covariates column which includes Name, State, and Party
for (i in 1:44){
  row.names(networks_dfs[[i]]) <- covariates_dfs[[i]]$Covariates
  names(networks_dfs[[i]]) <- covariates_dfs[[i]]$Covariates
}
## Warning: Setting row names on a tibble is deprecated.

## Warning: Setting row names on a tibble is deprecated.

## Warning: Setting row names on a tibble is deprecated.

## Warning: Setting row names on a tibble is deprecated.

## Warning: Setting row names on a tibble is deprecated.

## Warning: Setting row names on a tibble is deprecated.

## Warning: Setting row names on a tibble is deprecated.

## Warning: Setting row names on a tibble is deprecated.

## Warning: Setting row names on a tibble is deprecated.

## Warning: Setting row names on a tibble is deprecated.

## Warning: Setting row names on a tibble is deprecated.

## Warning: Setting row names on a tibble is deprecated.

## Warning: Setting row names on a tibble is deprecated.

## Warning: Setting row names on a tibble is deprecated.

## Warning: Setting row names on a tibble is deprecated.

## Warning: Setting row names on a tibble is deprecated.

## Warning: Setting row names on a tibble is deprecated.

## Warning: Setting row names on a tibble is deprecated.

## Warning: Setting row names on a tibble is deprecated.

## Warning: Setting row names on a tibble is deprecated.

## Warning: Setting row names on a tibble is deprecated.

## Warning: Setting row names on a tibble is deprecated.

## Warning: Setting row names on a tibble is deprecated.

## Warning: Setting row names on a tibble is deprecated.

## Warning: Setting row names on a tibble is deprecated.

## Warning: Setting row names on a tibble is deprecated.

## Warning: Setting row names on a tibble is deprecated.

## Warning: Setting row names on a tibble is deprecated.

## Warning: Setting row names on a tibble is deprecated.

## Warning: Setting row names on a tibble is deprecated.

## Warning: Setting row names on a tibble is deprecated.

## Warning: Setting row names on a tibble is deprecated.

## Warning: Setting row names on a tibble is deprecated.

## Warning: Setting row names on a tibble is deprecated.

## Warning: Setting row names on a tibble is deprecated.

## Warning: Setting row names on a tibble is deprecated.

## Warning: Setting row names on a tibble is deprecated.

## Warning: Setting row names on a tibble is deprecated.

## Warning: Setting row names on a tibble is deprecated.

## Warning: Setting row names on a tibble is deprecated.

## Warning: Setting row names on a tibble is deprecated.

## Warning: Setting row names on a tibble is deprecated.

## Warning: Setting row names on a tibble is deprecated.

## Warning: Setting row names on a tibble is deprecated.
# Turns each df in the networks_dfs list into an igraph object
networks <- c()

for (i in seq_along(networks_dfs)){
  networks[[i]] <- graph_from_adjacency_matrix(as.matrix(networks_dfs[[i]]), weighted = TRUE, mode = 'undirected')
}
# Create two separate graphs, one for positive connections and one for negative, for each network
pos_networks <- networks_dfs
neg_networks <- networks_dfs

# Create a copy of the network matrix, and get rid of all -1 values
for (i in seq_along(pos_networks)){
  pos_networks[[i]][pos_networks[[i]] == -1] <- 0
}

# Create a copy of the network matrix, and get rid of all +1 values and set -1 to +1
for (i in seq_along(neg_networks)){
  neg_networks[[i]][neg_networks[[i]] == 1] <- 0
  neg_networks[[i]][neg_networks[[i]] == -1] <- 1
}
# Turn the matrices into igraphs object and give it the Party and State attributes
g_pos_networks <- list()
g_neg_networks <- list()

for (i in seq_along(pos_networks)){
  g_pos_networks[[i]] <- as.undirected(graph_from_adjacency_matrix(as.matrix(pos_networks[[i]])))
  g_pos_networks[[i]] <- set_vertex_attr(g_pos_networks[[i]], 'Party', value = covariates_dfs[[i]]$Party)
  g_pos_networks[[i]] <- set_vertex_attr(g_pos_networks[[i]], 'State', value = covariates_dfs[[i]]$State)
}
  
for (i in seq_along(neg_networks)){
  g_neg_networks[[i]] <- as.undirected(graph_from_adjacency_matrix(as.matrix(neg_networks[[i]])))
  g_neg_networks[[i]] <- set_vertex_attr(g_neg_networks[[i]], 'Party', value = covariates_dfs[[i]]$Party)
  g_neg_networks[[i]] <- set_vertex_attr(g_neg_networks[[i]], 'State', value = covariates_dfs[[i]]$State)
  
}
V(g_neg_networks[[44]])[Party == 'D']$color <- "blue"
V(g_neg_networks[[44]])[Party == 'R']$color <- "red"
V(g_neg_networks[[44]])[Party == 'I']$color <- "purple"


plot(g_neg_networks[[44]], layout = layout_nicely, vertex.label = NA, main = 'Negative Connections by Party')

temp <- data.frame(as_edgelist(g_pos_networks[[44]]))
temp <- temp %>% separate(X1, c("Name_v1","State_Party"), sep = -6, remove = FALSE) %>% separate(State_Party,c("State_v1", "Party_v1"), sep = -3)
temp <- temp %>% separate(X2, c("Name_v2","State_Party"), sep = -6, remove = FALSE) %>% separate(State_Party,c("State_v2", "Party_v2"), sep = -3)
temp$Party_v1 <- str_sub(temp$Party_v1, -2, -2)
temp$Party_v2 <- str_sub(temp$Party_v2, -2, -2)
temp$State_v1 <- str_sub(temp$State_v1, -2,-1)
temp$State_v2 <- str_sub(temp$State_v2, -2,-1)
temp$PartyCombo <- paste(temp$Party_v1, temp$Party_v2)
g_pos_networks[[44]] <- set_edge_attr(g_pos_networks[[44]], 'ConectType', value = temp$PartyCombo)

temp <- temp %>% mutate('Col' = case_when(PartyCombo == 'D D' ~ 'blue',
                                          PartyCombo == 'R R' ~ 'red',
                                          PartyCombo == 'D R' ~ 'purple',
                                          PartyCombo == 'R D' ~ 'purple',
                                          PartyCombo == 'D I' ~ 'grey',
                                          PartyCombo == 'I D' ~ 'grey',
                                          PartyCombo == 'R I' ~ 'grey'))

g_pos_networks[[44]] <- set_edge_attr(g_pos_networks[[44]], 'Col', value = temp$Col)

V(g_pos_networks[[44]])[Party == 'D']$color <- "blue"
V(g_pos_networks[[44]])[Party == 'R']$color <- "red"
V(g_pos_networks[[44]])[Party == 'I']$color <- "purple"

# Create function to normalize all scores to 0 and 1
range01 <- function(x){(x-min(x))/(max(x)-min(x))}

V(g_pos_networks[[44]])$size <- (range01(betweenness(g_pos_networks[[44]]))+.25)*7.5

plot(g_pos_networks[[44]], vertex.label = NA, layout = layout.fruchterman.reingold, main = 'Positive Connections among U.S. Senators\n 114th Congressional Session', edge.color = adjustcolor(E(g_pos_networks[[44]])$Col, alpha.f = .5))

temp <- data.frame(as_edgelist(g_pos_networks[[44]]))
temp <- temp %>% separate(X1, c("Name_v1","State_Party"), sep = -6, remove = FALSE) %>% separate(State_Party,c("State_v1", "Party_v1"), sep = -3)
temp <- temp %>% separate(X2, c("Name_v2","State_Party"), sep = -6, remove = FALSE) %>% separate(State_Party,c("State_v2", "Party_v2"), sep = -3)
temp$Party_v1 <- str_sub(temp$Party_v1, -2, -2)
temp$Party_v2 <- str_sub(temp$Party_v2, -2, -2)
temp$State_v1 <- str_sub(temp$State_v1, -2,-1)
temp$State_v2 <- str_sub(temp$State_v2, -2,-1)
temp$PartyCombo <- paste(temp$Party_v1, temp$Party_v2)
g_pos_networks[[44]] <- set_edge_attr(g_pos_networks[[44]], 'ConectType', value = temp$PartyCombo)

temp <- temp %>% mutate('Col' = case_when(PartyCombo == 'D D' ~ 'blue',
                                          PartyCombo == 'R R' ~ 'red',
                                          PartyCombo == 'D R' ~ 'purple',
                                          PartyCombo == 'R D' ~ 'purple',
                                          PartyCombo == 'D I' ~ 'grey',
                                          PartyCombo == 'I D' ~ 'grey',
                                          PartyCombo == 'R I' ~ 'grey'))

g_pos_networks[[44]] <- set_edge_attr(g_pos_networks[[44]], 'Col', value = temp$Col)

V(g_pos_networks[[44]])[Party == 'D']$color <- "blue"
V(g_pos_networks[[44]])[Party == 'R']$color <- "red"
V(g_pos_networks[[44]])[Party == 'I']$color <- "purple"


plot(g_pos_networks[[44]], vertex.label = NA, layout = layout.fruchterman.reingold, vertex.size = 1, main = 'Positive Connections by Party', edge.color = adjustcolor(E(g_pos_networks[[44]])$Col, alpha.f = .15))

V(g_pos_networks[[34]])[Party == 'D']$color <- "blue"
V(g_pos_networks[[34]])[Party == 'R']$color <- "red"
V(g_pos_networks[[34]])[Party == 'I']$color <- "purple"
plot(g_pos_networks[[34]], layout = layout_nicely, vertex.label = NA, main = 'Positive Connections by Party')

GENERAL NETWORK PROPERTIES

COMPUTE DEGREE AND ASSORTATIVITY MEASURES OVER TIME BROKEN DOWN BY PARTY

pos_degree_r <- c()
pos_degree_d <- c()
neg_degree_r <- c()
neg_degree_d <- c()
pos_assort_r <- c()
pos_assort_d <- c()
neg_assort_r <- c()
neg_assort_d <- c()

for (i in seq_along(g_pos_networks)){
  pos_degree_r[[i]] <- degree(g_pos_networks[[i]], (V(g_pos_networks[[i]])$Party == 'R'))
  pos_degree_d[[i]] <- degree(g_pos_networks[[i]], (V(g_pos_networks[[i]])$Party == 'D'))
  pos_assort_r[[i]] <- assortativity_nominal(g_pos_networks[[i]], (V(g_pos_networks[[i]])$Party == 'R')+1)
  pos_assort_d[[i]] <- assortativity_nominal(g_pos_networks[[i]], (V(g_pos_networks[[i]])$Party == 'D')+1)
}

for (i in seq_along(g_neg_networks)){
  neg_degree_r[[i]] <- degree(g_neg_networks[[i]], (V(g_neg_networks[[i]])$Party == 'R'))
  neg_degree_d[[i]] <- degree(g_neg_networks[[i]], (V(g_neg_networks[[i]])$Party == 'D'))
  neg_assort_r[[i]] <- assortativity_nominal(g_neg_networks[[i]], (V(g_neg_networks[[i]])$Party == 'R')+1)
  neg_assort_d[[i]] <- assortativity_nominal(g_neg_networks[[i]], (V(g_neg_networks[[i]])$Party == 'D')+1)
}
par(mfrow=c(1,2))
# Starting year of the session
Session_Starting_Year <- c(1973, 1975, 1977, 1979, 1981, 1983, 1985, 1987, 1989, 1991, 1993, 1995, 1997, 1999, 2001, 2003, 2005, 2007, 2009, 2011, 2013, 2015)

plot(Session_Starting_Year, lapply(lapply(g_neg_networks[23:44], degree), sum), col = "orange", type="l", lwd = 2.5, ylim=c(0,6000),
     main = 'Degree of Senate Over Time', ylab = 'Network Degree')
lines(Session_Starting_Year, lapply(lapply(g_pos_networks[23:44], degree), sum), col = "black", lwd = 2.5)

legend(x = "topleft", legend = c('Negative', 'Positive'), pch = 19, col = c("orange", "black"), title = "Relationship Type")

plot(Session_Starting_Year, lapply(lapply(g_neg_networks[1:22], degree), sum), col = "orange", type="l", lwd = 2.5, ylim=c(10000,100000),
     main = 'Degree of House Over Time', ylab = 'Network Degree')
lines(Session_Starting_Year, lapply(lapply(g_pos_networks[1:22], degree), sum), col = "black", lwd = 2.5)

par(mfrow = c(2,2))

# House: Positive and Negative Relationships
boxplot(lapply(g_pos_networks[1:22], degree), main = 'Positive Relationship Degree - House')
lines(1:22, lapply(pos_degree_r[1:22], mean), col = "red", lwd = 2.5)
lines(1:22, lapply(pos_degree_d[1:22], mean), col = "blue", lwd = 2.5)

boxplot(lapply(g_neg_networks[1:22], degree), main = 'Negative Relationship Degree - House')
lines(1:22, lapply(neg_degree_r[1:22], mean), col = "red", lwd = 2.5)
lines(1:22, lapply(neg_degree_d[1:22], mean), col = "blue", lwd = 2.5)

# Senate: Positive and Negative Relationships
boxplot(lapply(g_pos_networks[23:44], degree), main = 'Positive Relationship Degree - Senate')
lines(1:22, lapply(pos_degree_r[23:44], mean), col = "red", lwd = 2.5)
lines(1:22, lapply(pos_degree_d[23:44], mean), col = "blue", lwd = 2.5)

boxplot(lapply(g_neg_networks[23:44], degree), main = 'Negative Relationship Degree - Senate')
lines(1:22, lapply(neg_degree_r[23:44], mean), col = "red", lwd = 2.5)
lines(1:22, lapply(neg_degree_d[23:44], mean), col = "blue", lwd = 2.5)

LOOKING AT TRENDS IN ASSORTATIVITY

par(mfrow = c(2,2))

# Plot Positive Relationships by party for the House 
plot(Session_Starting_Year, unlist(pos_assort_r[1:22]), type="l",col="red", ylab = 'Assortativity Coefficient', main = 'Positive Relationship Assortativity by Party Over Time in the House')
lines(Session_Starting_Year, unlist(pos_assort_d[1:22]), col = "blue")

# Plot Positive Relationships by party for the Senate
plot(Session_Starting_Year, unlist(pos_assort_r[23:44]),type="l",col="red", ylab = 'Assortativity Coefficient', main = 'Positive Relationship Assortativity by Party Over Time in the Senate')
lines(Session_Starting_Year, unlist(pos_assort_d[23:44]), col = "blue")

# Plot Negative Relationships by party for the House 
plot(Session_Starting_Year, unlist(neg_assort_r[1:22]),type="l",col="red", ylab = 'Assortativity Coefficient', main = 'Negative Relationship Assortativity by Party Over Time in the House')
lines(Session_Starting_Year, unlist(neg_assort_d[1:22]), col = "blue")

# Plot Negative Relationships by party for the Senate
plot(Session_Starting_Year, unlist(neg_assort_r[23:44]),type="l",col="red", ylab = 'Assortativity Coefficient', main = 'Negative Relationship Assortativity by Party Over Time in the Senate')
lines(Session_Starting_Year, unlist(neg_assort_d[23:44]), col = "blue")

par(mfrow = c(1,2))

# Plot Positive Relationships by party for the Senate
plot(rep(93:114), unlist(pos_assort_r[23:44]),type="l",col="red",
     ylab = 'Assortativity Coefficient',
     main = 'Positive Relationship Assortativity by Party Over Time in the Senate',
     xlab = 'Congressional Session')
lines(rep(93:114), unlist(pos_assort_d[23:44]), col = "blue")

legend(x = "topleft", legend = c('Republican', 'Democrat'),
       pch = 19,
       col = c("red", "blue"),
       title = "Political Party")

# Plot Negative Relationships by party for the Senate
plot(rep(93:114), unlist(neg_assort_r[23:44]),type="l",col="red",
     ylab = 'Assortativity Coefficient',
     main = 'Negative Relationship Assortativity by Party Over Time in the Senate',
     xlab = 'Congressional Session')
lines(rep(93:114), unlist(neg_assort_d[23:44]), col = "blue")

#Longitudinal Modeling ——

# Community Detection
blocks_pos <- c()
blocks_neg <- c()

for (i in seq_along(g_pos_networks)){
  blocks_pos[[i]] <- cluster_fast_greedy(g_pos_networks[[i]])
}

for (i in seq_along(g_neg_networks)){
  blocks_neg[[i]] <- cluster_fast_greedy(g_neg_networks[[i]])
}
# Degree Corrected Stochastic Block Model using randnet: https://cran.r-project.org/web/packages/randnet/randnet.pdf

# Need an adjacency matrix
g_pos_adj_matrices <- lapply(g_pos_networks, as_adjacency_matrix)
g_neg_adj_matrices <- lapply(g_neg_networks, as_adjacency_matrix)

# Empty vectors to store the degree-corrected stochastic block models
dcsbm_pos_networks <- c()
dcsbm_neg_networks <- c()

# Pass in the adjacency matrix and the membership computed via cluster_fast_greedy
for (i in seq_along(g_pos_adj_matrices)){
  dcsbm_pos_networks[[i]] <- DCSBM.estimate(g_pos_adj_matrices[[i]], blocks_pos[[i]]$membership)
}

for (i in seq_along(g_neg_adj_matrices)){
  dcsbm_neg_networks[[i]] <- DCSBM.estimate(g_neg_adj_matrices[[i]], blocks_neg[[i]]$membership)
}
plot(blocks_pos[[34]], g_pos_networks[[34]], vertex.label = vertex_attr(g_pos_networks[[34]])$Party, main = 'Positive Relationship - Clusters')

Remove Isolated Nodes for easier to interpret Community Detection Results

# If we want to remove the isolated nodes so they are not each their own community, one way is to manually remove them.

# First, identify them and create a vector to store them
Pos_Isolateds <- c()
Neg_Isolateds <- c()

for (i in seq_along(g_pos_networks)){
  Pos_Isolateds[[i]] <- which(degree(g_pos_networks[[i]]) == 0)
}

for (i in seq_along(g_neg_networks)){
  Neg_Isolateds[[i]] <- which(degree(g_neg_networks[[i]]) == 0)
}

# Then we create a new vector of networks by removing the isolated nodes
g_pos_networks_connected <- c()
g_neg_networks_connected <- c()

for (i in seq_along(g_pos_networks)){
  g_pos_networks_connected[[i]] <- igraph::delete.vertices(g_pos_networks[[i]], Pos_Isolateds[[i]])
}

for (i in seq_along(g_neg_networks)){
  g_neg_networks_connected[[i]] <- igraph::delete.vertices(g_neg_networks[[i]], Neg_Isolateds[[i]])
}

# Run community detection on connected blocks
connected_blocks_pos <- c()
connected_blocks_neg <- c()

for (i in seq_along(g_pos_networks_connected)){
  connected_blocks_pos[[i]] <- cluster_fast_greedy(g_pos_networks_connected[[i]])
}

for (i in seq_along(g_neg_networks_connected)){
  connected_blocks_neg[[i]] <- cluster_fast_greedy(g_neg_networks_connected[[i]])
}
plot(connected_blocks_pos[[34]], g_pos_networks_connected[[34]], vertex.label = vertex_attr(g_pos_networks_connected[[34]])$Party, main = 'Positive Relationship - Clusters')

DEGREE-CORRECTED SBM USING DETECTED COMMUNITIES

# Degree Corrected Stochastic Block Model on connected networks using randnet: https://cran.r-project.org/web/packages/randnet/randnet.pdf

# Need an adjacency matrix
g_pos_connected_adj_matrices <- lapply(g_pos_networks_connected, as_adjacency_matrix)
g_neg_connected_adj_matrices <- lapply(g_neg_networks_connected, as_adjacency_matrix)

# Empty vectors to store the degree-corrected stochastic block models
dcsbm_pos_connected_networks <- c()
dcsbm_neg_connected_networks <- c()

# Pass in the adjacency matrix and the membership computed via cluster_fast_greedy
for (i in seq_along(g_pos_connected_adj_matrices)){
  dcsbm_pos_connected_networks[[i]] <- DCSBM.estimate(g_pos_connected_adj_matrices[[i]], connected_blocks_pos[[i]]$membership)
}

for (i in seq_along(g_neg_connected_adj_matrices)){
  dcsbm_neg_connected_networks[[i]] <- DCSBM.estimate(g_neg_connected_adj_matrices[[i]], connected_blocks_neg[[i]]$membership)
}
# Create copy of the connected networks and then reassign the Party attribute from D, R, I, or other to 1,2,3,4 so that we can pass this to the DCSBM.estimate() function
pos_party_labels <- g_pos_networks_connected
neg_party_labels <- g_neg_networks_connected


for (i in 1:44){
  V(pos_party_labels[[i]])$Party[V(pos_party_labels[[i]])$Party == 'D'] <- 1
  V(pos_party_labels[[i]])$Party[V(pos_party_labels[[i]])$Party == 'R'] <- 2
  V(pos_party_labels[[i]])$Party[V(pos_party_labels[[i]])$Party == 'I'] <- 3
  V(pos_party_labels[[i]])$Party[!(V(pos_party_labels[[i]])$Party %in% c(1, 2, 3))] <- 4
  V(neg_party_labels[[i]])$Party[V(neg_party_labels[[i]])$Party == 'D'] <- 1
  V(neg_party_labels[[i]])$Party[V(neg_party_labels[[i]])$Party == 'R'] <- 2
  V(neg_party_labels[[i]])$Party[V(neg_party_labels[[i]])$Party == 'I'] <- 3
  V(neg_party_labels[[i]])$Party[!(V(neg_party_labels[[i]])$Party %in% c(1, 2, 3))] <- 4
  
}

DEGREE-CORRECTED SBM USING POLITICAL PARTY AS COMMUNITIES

# Degree Corrected Stochastic Block Model on connected networks using Party as the community using randnet: https://cran.r-project.org/web/packages/randnet/randnet.pdf

# Create copy of the connected networks and then reassign the Party attribute from D, R, I, or other to 1,2,3,4 so that we can pass this to the DCSBM.estimate() function
pos_party_labels <- g_pos_networks_connected
neg_party_labels <- g_neg_networks_connected


for (i in 1:44){
  V(pos_party_labels[[i]])$Party[V(pos_party_labels[[i]])$Party == 'D'] <- 1
  V(pos_party_labels[[i]])$Party[V(pos_party_labels[[i]])$Party == 'R'] <- 2
  V(pos_party_labels[[i]])$Party[V(pos_party_labels[[i]])$Party == 'I'] <- 3
  V(pos_party_labels[[i]])$Party[!(V(pos_party_labels[[i]])$Party %in% c(1, 2, 3))] <- 4
  V(neg_party_labels[[i]])$Party[V(neg_party_labels[[i]])$Party == 'D'] <- 1
  V(neg_party_labels[[i]])$Party[V(neg_party_labels[[i]])$Party == 'R'] <- 2
  V(neg_party_labels[[i]])$Party[V(neg_party_labels[[i]])$Party == 'I'] <- 3
  V(neg_party_labels[[i]])$Party[!(V(neg_party_labels[[i]])$Party %in% c(1, 2, 3))] <- 4
}

# Empty vectors to store the degree-corrected stochastic block models
dcsbm_party_pos_connected_networks <- c()
dcsbm_party_neg_connected_networks <- c()

# Pass in the adjacency matrix and the membership determined by party membership (vector of numbers instead of 'D', 'R', etc because DCSBM.estiate needs numeric values)
for (i in seq_along(g_pos_connected_adj_matrices)){
  dcsbm_party_pos_connected_networks[[i]] <- DCSBM.estimate(g_pos_connected_adj_matrices[[i]], V(pos_party_labels[[i]])$Party)
}

for (i in seq_along(g_neg_connected_adj_matrices)){
  dcsbm_party_neg_connected_networks[[i]] <- DCSBM.estimate(g_neg_connected_adj_matrices[[i]], V(neg_party_labels[[i]])$Party)
}
# Create a function to convert log-likelihoods to probabilities to make the output of dcsbm.estimate() more understandable
logit2prob <- function(logit){
  odds <- exp(logit)
  prob <- odds / (1 + odds)
  return(prob)
}
# Store a list of the probability matrices from dcsbm models in a list because printing directly doesn't work when using the logit2 prob because the logit2prob function returns NaN when the log-likelihood is very high (roughly > 800). So we store the list and then replace all NaN values with 1, then plot these probability matrices later

b_prob_mat_pos <- c()
b_prob_mat_neg <- c()
b_prob_party_mat_pos <- c()
b_prob_party_mat_neg <- c()

for (i in 1:44){
  b_prob_mat_pos[[i]] <- logit2prob(dcsbm_pos_connected_networks[[i]]$B)
  b_prob_mat_neg[[i]] <- logit2prob(dcsbm_pos_connected_networks[[i]]$B)
  b_prob_party_mat_pos[[i]] <- logit2prob(dcsbm_party_pos_connected_networks[[i]]$B)
  b_prob_party_mat_neg[[i]] <- logit2prob(dcsbm_party_pos_connected_networks[[i]]$B)
}

for (i in 1:44){
  b_prob_mat_pos[[i]][b_prob_mat_pos[[i]] == 'NaN'] <- 1.00000
  b_prob_mat_neg[[i]][b_prob_mat_neg[[i]] == 'NaN'] <- 1.00000
  b_prob_party_mat_pos[[i]][b_prob_party_mat_pos[[i]] == 'NaN'] <- 1.00000
  b_prob_party_mat_neg[[i]][b_prob_party_mat_neg[[i]] == 'NaN'] <- 1.00000
}
par(mfrow = c(4,4))
for (i in 1:16){
  #image.plot(logit2prob(dcsbm_pos_connected_networks[[i]]$B))
  image.plot(b_prob_party_mat_pos[[i]])
}

COUNT OF COLUMNS IN THE TABLES OF POSITIVE CONNECTIONS THAT SHOW A CONTINGENCY TABLE OF PARTY AND DETECTED COMMUNITY THAT HAVE MORE THAN 1 NON-ZERO VALUE TO SHOW COMMUNITIES THAT INCLUDE MEMBERS OF MORE THAN 1 POLITICAL PARTY

table(vertex_attr(g_pos_networks_connected[[34]])$Party, connected_blocks_pos[[34]]$membership)
##    
##      1  2  3  4
##   D 41  1  0  1
##   I  2  0  0  0
##   R  4 43  2  1
colSums(table(vertex_attr(g_pos_networks_connected[[34]])$Party, connected_blocks_pos[[34]]$membership) != 0) > 1
##     1     2     3     4 
##  TRUE  TRUE FALSE  TRUE

FALSE = 1 party, TRUE = more than 1 party

vertex_attr(g_pos_networks[[34]])$name[connected_blocks_pos[[34]]$membership == 4]
## [1] "Biden, J. (DE-D)"    "Daschle, T. (SD-D)"  "Pressler, L. (SD-R)"

CREATE THE PARTY-COMMUNITY TABLES FOR EVERY SESSION

party_community_tables <- c()

for (i in seq_along(g_pos_networks_connected)){
  party_community_tables[[i]] <- table(vertex_attr(g_pos_networks_connected[[i]])$Party, connected_blocks_pos[[i]]$membership)
}

CREATE A FUNCTION TO CALCULATE THE COLUMNSUMS AND THEN GET THE PROPORTION OF COMMUNITIES WITH MORE THAN 1 PARTY COMPARED TO TOTAL NUMBER OF COMMUNITIES

calculate_crossparty_proportion <- function(tables){
  columnsums <- colSums(tables != 0)
  prop <- length(columnsums[columnsums > 1])/length(columnsums)
  return(prop)
}

PLOT THE PROPORTION OF COMMUNITIES WITH MULTIPLE PARTIES BY SENATE SESSION

#par(mfrow = c(1,2))

plot(Session_Starting_Year, unlist(lapply(party_community_tables, FUN = calculate_crossparty_proportion), use.names = FALSE)[23:44], 
     ylab = 'Proportion of Communities with Multi-Party Membership', 
     main = 'Proportion of Communities that Contain More than 1 Political Party', type = 'l', col = 'orange', lwd = 2, ylim = c(.15, 1.15))
lines(Session_Starting_Year, unlist(lapply(party_community_tables, FUN = calculate_crossparty_proportion), use.names = FALSE)[1:22], col = 'dodgerblue', lwd = 2)
legend(x = "topleft", legend = c('Senate', 'House'), pch = 19, col = c("orange", "dodgerblue"), title = "Congressional Body")

SELECT COMMUNITIES TWO LARGEST COMMUNITIES FOR EACH SESSION, THEN SHOW % OF R & % OF D

# Function that takes in a contingency table, keeps the two largest communities (columns in the table), renames the communities to 1 and 2, extracts the political 
calculate_largest_communities <- function(tables){
  # Sort table by columnsums (number of nodes in a community) and keep only the top two
  tables <- tables[, names(sort(desc(colSums(prop.table(tables)))))[1:2]]
  colnames(tables) <- c(1,2)
  
  # Extract the rowname (political party) with the highest value in column 1 - that is the dominant political party for that community (column) 
  mainparty_community1 <- names(sort(desc(tables[,1])))[1]
  # Extract the rowname (political party) with the highest value in column 2 - that is the dominant political party for that community (column) 
  mainparty_community2 <- names(sort(desc(tables[,2])))[1]

  # Turn table into df
  table_dfs <- as.data.frame(tables)
  
  # Remove non Democrat and non Republican members
  table_dfs <- table_dfs %>% subset(Var1 %in% c('D', 'R'))
  table_dfs$Var1 <- factor(table_dfs$Var1)
  
  # Calculate a proportion from the Frequency counts
  table_dfs <- table_dfs %>% mutate(Prop = round(Freq/sum(Freq), 3))
  table_dfs$MainPartyCommunity <- '.'
  table_dfs$MainPartyCommunity[table_dfs$Var2 == 1] <- mainparty_community1
  table_dfs$MainPartyCommunity[table_dfs$Var2 == 2] <- mainparty_community2
  return(table_dfs)
}

# Store vector of dataframes showing number of Ds and Rs in the two'largest'
d_r_prop_tables <- c()

# Loop through each of the party_community_tables and apply the function above and store results in the d_r_prop_tables
for (i in seq_along(party_community_tables)){
  d_r_prop_tables[[i]] <- calculate_largest_communities(party_community_tables[[i]])
}

# Convert list of dataframes into 1 dataframe and include .id = 'Session' so we know what list(session) the data is from
party_community_df <- bind_rows(d_r_prop_tables, .id = 'Session')

Number of nodes that fit in the two largest communities per session. We can see that in the earlier sessions of the senate, only about 70% are included in the two largest communities, while later on almost every single node is included in the two largest communities. 387 391 421 406 406 414 412 422 417 382 418 419 422 434 436 430 433 442 419 435 437 435 72 73 73 67 63 61 71 71 81 75 73 91 94 97 92 87 85 97 101 93 102 97

# Get the maximum Freq when comparing the session and community.
party_community_df <- party_community_df %>% group_by(Session, Var2) %>% mutate(dom = max(Freq))

# Create new column which should have the label of the dominant party in a specific Session-Community. Then we can label communities as 'Repub' or 'Dem' instead of 1 and 2 which sometimes are R-dominated and othertimes D-dominates
party_community_df <- party_community_df %>% group_by(Session, Var2) %>% mutate(mainparty = ifelse(dom == Freq, as.character(Var1), 'ignore'))
party_community_df <- party_community_df %>% group_by(Session, Var2) %>% mutate(Percentage = Freq/sum(Freq))

# Change Factors to Character, Character to Numeric for area plotting
party_community_df$Session <- as.numeric(party_community_df$Session)
party_community_df$Var1 <- as.character(party_community_df$Var1)

# Ggplot was having issues with the tibble, so converting to dataframe
party_community_df <- data.frame(party_community_df)

# Changing Variable Names and Values for interpretation purposes
names(party_community_df)[names(party_community_df) == "Var1"] <- "Political Party"
party_community_df$`Political Party`[party_community_df$`Political Party` == 'D'] <- 'Democrat'
party_community_df$`Political Party`[party_community_df$`Political Party` == 'R'] <- 'Republican'

# Remove any sessions in which one party dominated both communities for the sake of the visualization below
party_community_df <- party_community_df %>% group_by(Session) %>% mutate('1partydom' = length(unique(mainparty)))
party_community_df <- party_community_df %>% group_by(Session) %>% subset(`1partydom` > 2)
# Show Percentage of Ds and Rs in the House for the D-dominated Community and R-dominated Community Over time. Ignoring session 2 because both communities were D-dominated
#ggplot(party_community_df[party_community_df$Session %in% c(1, 3:22), ], aes(x = Session, y = Percentage, fill = `Political Party`)) +

seshs <- party_community_df[party_community_df$Session <= 22, ]$Session

ggplot(party_community_df[party_community_df$Session <= 22, ], aes(x = Session, y = Percentage, fill = `Political Party`)) +
    
  geom_area(alpha=0.85, size = 1) + 
  
  facet_wrap(~MainPartyCommunity, 
             labeller = as_labeller(c('D' = 'Democrat-Dominated Community', 'R' = 'Republican-Dominated Community'))) + 
  
  scale_fill_manual(values = c("blue", "red")) + 
  
  ggtitle('Trends in Polarization in the U.S. House of Representatives\nEvolution of the Distribution of Political Party Membership in Detected Communities') +
  
  ylab("Percentage of Total Community Membership by Party") +
  
  xlab("Congressional Session") +
  
  theme_economist() +
  
  scale_color_economist() +

  scale_x_continuous(breaks=rep(1:length(unique(seshs))),
        labels = rep(93:114)[unique(seshs)]) +
  
  theme(axis.title.y = element_text(size = 16, angle = 90), 
        axis.text.y = element_text(size = 13, margin = margin(r = 0)),
        axis.title.x = element_text(size = 16),
        axis.text.x = element_text(size = 13, angle = 45, hjust = -.05),
        legend.title = element_text(size = 16),
        legend.box.margin = margin(6),
        legend.background = element_rect(fill = "lightgray"),
        plot.title = element_text(hjust = 0.5),
        panel.grid.major = element_blank(),
        panel.spacing = unit(0, "mm"))

# Show Percentage of Ds and Rs in the Senate for the D-dominated Community and R-dominated Community Over time. Ignoring session 26 because both communities were D-dominated

seshs <- party_community_df[party_community_df$Session >= 23, ]$Session

# used later to get the unique senate sessions that were included in the plot
seshs <- seshs-22

ggplot(party_community_df[party_community_df$Session >= 23, ], aes(x = seshs, y = Percentage, fill = `Political Party`)) + 
  
  geom_area(alpha = .85, size = 1) + facet_wrap(~MainPartyCommunity) + 
    
  facet_wrap(~MainPartyCommunity, 
             labeller = as_labeller(c('D' = 'Community One (Democrat-Dominated)', 'R' = 'Community Two (Republican-Dominated)'))) + 
  
  scale_fill_manual(values = c("blue", "red")) + 
  
  ggtitle('Polarization in the U.S. Senate\nTrends in Political Party Membership in Two Detected Communities') +
  
  ylab("Percentage of Total Community Membership by Party") +
  
  xlab("Senate Session") +
  
  theme_economist() +
  
  scale_color_economist() +
  
  scale_x_continuous(breaks=rep(1:length(unique(seshs))),
        labels = rep(93:114)[unique(seshs)]) +
  
  theme(axis.title.y = element_text(size = 16, angle = 90), 
        axis.text.y = element_text(size = 13, margin = margin(r = 0)),
        axis.title.x = element_text(size = 16),
        axis.text.x = element_text(size = 13, angle = 45, hjust = -.05),
        legend.title = element_text(size = 16),
        legend.box.margin = margin(6),
        legend.background = element_rect(fill = "lightgray"),
        plot.title = element_text(hjust = 0.5),
        panel.grid.major = element_blank(),
        panel.spacing = unit(0, "mm"))

LINEAR REGRESSION: REGRESSING NUMBER OF COMMUNITIES DETECTED ON SESSION

#Calculate the number of clusters for positive connections on the connected graphs
clusters <- c()
for (i in 1:length(connected_blocks_pos)){
  clusters[[i]] <- length(communities(connected_blocks_pos[[i]]))
}

# Calculate the number of vertices in each graph so we can distinguish Congress from Senate
vertices <- c()
for (i in seq_along(g_pos_networks_connected)){
  vertices[[i]] <- length(V(g_pos_networks_connected[[i]]))
}

# Turn the list of cluster values into a vector for plotting
clusters <- unlist(clusters, use.names = FALSE)

POISSON INSTEAD OF OLS

# Plot the number of communities/clusters for each senate (higher X is more recent Senate)
senate_fit <- lm(clusters[23:44] ~ c(1:22))

plot(c(1:22), clusters[23:44], xlab = 'Senate Session', ylab = 'Number of Communities Detected', main = 'Regressing Number of Communities Detected on Senate Session\nHigher Value of Session Implies More Recent')

axis(side = 1, at = c(1:22), labels= c(93:114))

abline(senate_fit,col="red")

# Plot the number of communities/clusters for each congress (higher X is more recent congress)
congress_fit <- lm(clusters[1:22] ~ rep(1:22))

plot(c(1:22), clusters[1:22],xlab = 'Congressional Session', ylab = 'Number of Communities Detected', main = 'Regressing Number of Communities Detected on Congressional Session\nHigher Value of Session Implies More Recent')

axis(side = 1, at = c(1:22), labels= c(93:114))

abline(congress_fit,col="red")

summary(senate_fit)
## 
## Call:
## lm(formula = clusters[23:44] ~ c(1:22))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.37324 -0.89328 -0.09091  0.71556  1.41728 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.38961    0.42478  12.688 5.05e-11 ***
## c(1:22)     -0.11293    0.03234  -3.492   0.0023 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9624 on 20 degrees of freedom
## Multiple R-squared:  0.3787, Adjusted R-squared:  0.3477 
## F-statistic: 12.19 on 1 and 20 DF,  p-value: 0.002299

For each new Senate Session, the number of communities detected decreases by .11, significant at the .01 level. Fewer communities could be a proxy for polarization, in which communities that existed outside the bounds of pure party affiliation are dissolved and are drawn into one of the two parties.

summary(congress_fit)
## 
## Call:
## lm(formula = clusters[1:22] ~ rep(1:22))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.26200 -0.47741  0.03162  0.46513  1.29757 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   5.1429     0.3178  16.182 5.89e-13 ***
## rep(1:22)    -0.1468     0.0242  -6.067 6.25e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.72 on 20 degrees of freedom
## Multiple R-squared:  0.648,  Adjusted R-squared:  0.6304 
## F-statistic: 36.81 on 1 and 20 DF,  p-value: 6.252e-06

For each new Congress Session, the number of communities detected decreases by .14,significant at the .001 level. Fewer communities could be a proxy for polarization, in which communities that existed outside the bounds of pure party affiliation are dissolved and are drawn into one of the two parties.

#Longitudinal Modeling Part 3 ——

ERGM Modeling - 93 - 114th

# ERGM models for the longitudial study
# Do this and report the EXPIT for the summaries of each of edges v party
# General Positive ####

ML1 <- list(c())
X <- 1
for (i in g_pos_networks[23:44]) {
  g_p_net.ergm <- network(as.matrix(as_adjacency_matrix(i)))
  set.vertex.attribute(g_p_net.ergm,"Party",vertex.attributes(i)$Party)
  set.vertex.attribute(g_p_net.ergm,"State",vertex.attributes(i)$State)
  set.vertex.attribute(g_p_net.ergm,"Name",vertex.attributes(i)$name)
  ML1[[X]] <- ergm(g_p_net.ergm ~ edges + match("Party"), 
                 control = control.ergm(MCMC.burnin = 5000, 
                                        MCMC.interval =  10,
                                        MCMLE.maxit = 10))
  X <- X + 1
}
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate.
ML1.Edges <- c()
ML1.Edges.Party <- c()
X <- 1
for (i in ML1) {
  ML1.Edges[X] <- expit(i$coef[1])
  ML1.Edges.Party[X] <- expit(i$coef[1] + i$coef[2])
  X <- X + 1
}

plot(c(1:22),ML1.Edges.Party) # Include Graphic

plot(c(1:22),ML1.Edges)

ML1.1 <- list(c())
X <- 1
for (i in g_pos_networks[23:44]) {
  g_p_net.ergm <- network(as.matrix(as_adjacency_matrix(i)))
  set.vertex.attribute(g_p_net.ergm,"Party",vertex.attributes(i)$Party)
  set.vertex.attribute(g_p_net.ergm,"State",vertex.attributes(i)$State)
  set.vertex.attribute(g_p_net.ergm,"Name",vertex.attributes(i)$name)
  ML1.1[[X]] <- ergm(g_p_net.ergm ~ edges + match("Party") + match("State"), 
                 control = control.ergm(MCMC.burnin = 5000, 
                                        MCMC.interval =  10,
                                        MCMLE.maxit = 10))
  X <- X + 1
}
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate.
# General Negative ####
ML2 <- list(c())
X <- 1
for (i in g_neg_networks[23:44]) {
  g_p_net.ergm <- network(as.matrix(as_adjacency_matrix(i)))
  set.vertex.attribute(g_p_net.ergm,"Party",vertex.attributes(i)$Party)
  set.vertex.attribute(g_p_net.ergm,"State",vertex.attributes(i)$State)
  set.vertex.attribute(g_p_net.ergm,"Name",vertex.attributes(i)$name)
  ML2[[X]] <- ergm(g_p_net.ergm ~ edges + match("Party"), 
                 control = control.ergm(MCMC.burnin = 5000, 
                                        MCMC.interval =  10,
                                        MCMLE.maxit = 10))
  X <- X + 1
}
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate.
ML2.Edges <- c()
ML2.Edges.Party <- c()
X <- 1
for (i in ML2) {
  ML2.Edges[X] <- expit(i$coef[1])
  ML2.Edges.Party[X] <- expit(i$coef[1] + i$coef[2])
  X <- X + 1
}

plot(c(1:22),ML2.Edges.Party) # Include Graphic

plot(c(1:22),ML2.Edges)

ML2.1 <- list(c())
X <- 1
for (i in g_neg_networks[23:44]) {
  g_p_net.ergm <- network(as.matrix(as_adjacency_matrix(i)))
  set.vertex.attribute(g_p_net.ergm,"Party",vertex.attributes(i)$Party)
  set.vertex.attribute(g_p_net.ergm,"State",vertex.attributes(i)$State)
  set.vertex.attribute(g_p_net.ergm,"Name",vertex.attributes(i)$name)
  ML2.1[[X]] <- ergm(g_p_net.ergm ~ edges + match("Party") + match("State"), 
                 control = control.ergm(MCMC.burnin = 5000, 
                                        MCMC.interval =  10,
                                        MCMLE.maxit = 10))
  X <- X + 1
}
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Observed statistic(s) nodematch.State are at their smallest attainable values. Their coefficients will be fixed at -Inf.
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Observed statistic(s) nodematch.State are at their smallest attainable values. Their coefficients will be fixed at -Inf.
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Observed statistic(s) nodematch.State are at their smallest attainable values. Their coefficients will be fixed at -Inf.
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate. 
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate.
par(mfrow=c(2,2))
plot(c(93:114),ML1.Edges.Party, main = "Estimated Probability of Connecting", sub = "Positive Graph", xlab = "Senate Sessions", ylab = "Probability") # Include Graphic
plot(c(93:114),ML2.Edges.Party, main = "Estimated Probability of Connecting", sub = "Negative Graph", xlab = "Senate Sessions", ylab = "Probability") # Include Graphic
par(mfrow=c(1,2))

#Validation - Comparing Community Detection Algorithms ——

== == == == == == == == == == == == == == == = Comparison of Community Detection Algorithms == == == == == == == == == == == == == == == =

  1. Count the number of communities detected by each algo, compare visually

  2. Use the Compare() function to compare each algorithm

  1. Will need to look across comparisons (algo1 ~ algo2) vs (algo1 ~ algo3)
  2. Need to decide on the method of comparison (vi, nmi, splitjoin, etc.)
  3. Use the method with the least amount of variation in comparison scores ai. Look at the mean variance in the correlation matrices of comparison scores and select method with lowest mean variance in correlation matrix
  4. Once the method is determined, compute the mean comparison score (sum across each network) using the determined method
  5. For each algorithm, add up the mean comparison scores for every comparison it is involved in
  6. Select the algorithm with the highest value of mean comparison scores
# Comparing Community Detection Algorithms. Might be helpful: https://www.nature.com/articles/srep30750

# Seven Different community Detection Algorithms, cs doesn't work for unconnected network
cfg <- cluster_fast_greedy(g_pos_networks[[34]])
cim <- cluster_infomap(g_pos_networks[[34]])
cl <- cluster_louvain(g_pos_networks[[34]])
cwt <- cluster_walktrap(g_pos_networks[[34]])
#cs <- cluster_spinglass(g_pos_networks[[34]])
cle <- cluster_leading_eigen(g_pos_networks[[34]])
ceb <- cluster_edge_betweenness(g_pos_networks[[34]])

# Comparing the results
compare(cfg, cim)
## [1] 0.7916856
compare(cfg, cl)
## [1] 0.2832559
compare(cim, cl)
## [1] 0.945693
# Comparing Community Detection Algorithms. Might be helpful: https://www.nature.com/articles/srep30750

# Seven Different community Detection Algorithms, cluster_spinglass doesn't work for unconnected network

# Loop through all positive connected networks and detect communities
cfgs <- lapply(g_pos_networks_connected, cluster_fast_greedy)
cims <- lapply(g_pos_networks_connected, cluster_infomap)
cls <- lapply(g_pos_networks_connected, cluster_louvain)
cwts <- lapply(g_pos_networks_connected, cluster_walktrap)

# Long run times
#css <- lapply(lapply(g_pos_networks_connected, cluster_spinglass), membership)
#cles <- lapply(lapply(g_pos_networks_connected, cluster_leading_eigen), membership)
#cebs <- lapply(lapply(g_pos_networks_connected, cluster_edge_betweenness), membership)
# Compare number of communities in each clustering algorithm
par(mfrow = c(2,1))
plot(Session_Starting_Year, lapply(lapply(lapply(cfgs[1:22], membership), unique), length), type = "l", col = "red", ylim = c(1,16), 
     main = 'Number of Unique Communities Detected by Various Clustering Algorithms for the Positive Relationship Networks of the U.S. House of Representatives',
     ylab = 'Number of Unique Communities',
     xlab = 'Networks\nEach Network Corresponds to Congressional Session')
lines(Session_Starting_Year, lapply(lapply(lapply(cims[1:22], membership), unique), length), col = "dodgerblue")
lines(Session_Starting_Year, lapply(lapply(lapply(cls[1:22], membership), unique), length), col = "black")
lines(Session_Starting_Year, lapply(lapply(lapply(cwts[1:22], membership), unique), length), col = "green")

plot(Session_Starting_Year, lapply(lapply(lapply(cfgs[1:22], membership), unique), length), type = "l", col = "red", ylim = c(1,19), 
     main = 'Number of Unique Communities Detected by Various Clustering Algorithms for the Positive Relationship Networks of the U.S. Senate',
     ylab = 'Number of Unique Communities',
     xlab = 'Networks\nEach Network Corresponds to Congressional Session')
lines(Session_Starting_Year, lapply(lapply(lapply(cims[23:44], membership), unique), length), col = "dodgerblue")
lines(Session_Starting_Year, lapply(lapply(lapply(cls[23:44], membership), unique), length), col = "black")
lines(Session_Starting_Year, lapply(lapply(lapply(cwts[23:44], membership), unique), length), col = "green")

There is a decent amount of variation in just the number of communities that were detected by the various algorithms, especially for the House of Representatives (this makes sense since there are 435 nodes compared to 100 for the senate). For robust results on the polarization analyses, we would want these different clustering algorithms detect similar communities.

Let’s look at how the different clustering algorithms look on an actual network - selecting network 31 because of the large spike in the number of communities in the walktrap algorithm

n <- 5
par(mfrow = c(2,2))

# Pick a layout and store the coorindates, and use those coords to layout all the graphs so they have the same nodes in teh sample location
coords <- layout_nicely(g_pos_networks_connected[[n]])

plot(cfgs[[n]], g_pos_networks_connected[[n]], layout = coords, 
     vertex.label = vertex_attr(g_pos_networks_connected[[n]])$Party,
     main = 'Cluster Fast Greedy - Positive Relationship Network',
     sub = 'This function tries to find dense subgraph,\nalso called communities in graphs via directly optimizing a modularity score.')
plot(cims[[n]], g_pos_networks_connected[[n]], layout = coords,
     vertex.label = vertex_attr(g_pos_networks_connected[[n]])$Party,
     main = 'Infomap - Positive Relationship Network',
     sub = 'Find community structure that minimizes the expected\n description length of a random walker trajectory')
plot(cls[[n]], g_pos_networks_connected[[n]], layout = coords,
     vertex.label = vertex_attr(g_pos_networks_connected[[n]])$Party,
     main = 'Louvain - Positive Relationship Network',
     sub = 'This function implements the multi-level modularity\noptimization algorithm for finding community structure.\nIt is based on the modularity measure and a hierarchial approach.')
plot(cwts[[n]], g_pos_networks_connected[[n]], layout = coords, 
     vertex.label = vertex_attr(g_pos_networks_connected[[n]])$Party,
     main = 'Walktrap - Positive Relationship Network',
     sub = 'This function tries to find densely connected subgraphs,\n also called communities in a graph via random walks.\nThe idea is that short random walks tend to stay in the same community.')

Let’s use a more precise method of comparing the different algorithms, the compare() method

# Which method should we choose for comparing?
compare(cfgs[[31]], cls[[31]], method = 'vi')
## [1] 0.2963354
compare(cfgs[[31]], cls[[31]], method = 'nmi')
## [1] 0.8539984
compare(cfgs[[31]], cls[[31]], method = 'split.join')
## [1] 8
compare(cfgs[[31]], cls[[31]], method = 'rand')
## [1] 0.9466941
compare(cfgs[[31]], cls[[31]], method = 'adjusted.rand')
## [1] 0.8863841

Each Method returns a different value, but we should compare these values on more than one network, and with each other over time to see if they are bounded between 0,1, and other properties.

# Trash?
Comparisons <- rep(rep(c('cfg-cim', 'cfg-cl', 'cfg-cwt', 'cim-cl', 'cim-cwt', 'cl-cwt'), 5), 44)
Methods <- rep(rep(c('vi', 'nmi', 'split.join', 'rand', 'adjusted.rand'), each =  6), 44)


cluster_comparisons_df <- data.frame(Comparisons, Methods) %>% separate(Comparisons, c("C1", "C2"), remove = FALSE)

COMPUTE A COMPARISON SCORE FOR EACH ALGORITHM USING EACH DIFFERENT METHOD (COMPARING CLUSTER FAST GREEDY TO INFOMAP USING RAND, ADJUSTED RAND, VI, NMI, AND SPLIT.JOIN METHODS)

cfgs_cims <- list()
cfgs_cls <- list()
cfgs_cwts <- list()
cims_cls <- list()
cims_cwts <- list()
cls_cwts <- list()

final_cfgs_cims <- list()
final_cfgs_cls <- list()
final_cfgs_cwts <- list()
final_cims_cls <- list()
final_cims_cwts <- list()
final_cls_cwts <- list()

for (i in 1:44) {
  for (j in c('vi', 'nmi', 'split.join', 'rand', 'adjusted.rand')) {
    cfgs_cims[[j]] <- compare(cfgs[[i]], cims[[i]], method = j)
    cfgs_cls[[j]] <- compare(cfgs[[i]], cls[[i]], method = j)
    cfgs_cwts[[j]] <- compare(cfgs[[i]], cwts[[i]], method = j)
    cims_cls[[j]] <- compare(cims[[i]], cls[[i]], method = j)
    cims_cwts[[j]] <- compare(cims[[i]], cwts[[i]], method = j)
    cls_cwts[[j]] <- compare(cls[[i]], cwts[[i]], method = j)
  } 
  final_cfgs_cims[[i]] <- cfgs_cims
  final_cfgs_cls[[i]] <- cfgs_cls
  final_cfgs_cwts[[i]] <- cfgs_cwts
  final_cims_cls[[i]] <- cims_cls 
  final_cims_cwts[[i]] <- cims_cwts
  final_cls_cwts[[i]] <- cls_cwts
}

FOR EACH NETWORK, PLOT OF SCORES FOR A CERTAIN METHOD FOR EACH OF THE COMPARISONS (CLUSTER FAST GREEDY VS INOFMAP, INFOMAP VS WALKTRAP, ETC)

par(mfrow = c(5,1))

# Plot all compare scores on the 'rand' method
plot(c(1:44), map(final_cfgs_cims , ~.[["rand"]]), type = "l", ylab = 'Comparison Score', main = 'Comparison Scores using Rand Method', ylim = c(0,1))
lines(c(1:44), map(final_cfgs_cls, ~.[["rand"]]), col = "red")
lines(c(1:44), map(final_cfgs_cwts, ~.[["rand"]]), col = "dodgerblue")
lines(c(1:44), map(final_cims_cls, ~.[["rand"]]), col = "green")
lines(c(1:44), map(final_cims_cwts, ~.[["rand"]]), col = "orange")
lines(c(1:44), map(final_cls_cwts, ~.[["rand"]]), col = "purple")

# Plot all compare scores on the 'vi' method
plot(c(1:44), map(final_cfgs_cims , ~.[["vi"]]), type = "l", ylab = 'Comparison Score', main = 'Comparison Scores using VI Method', ylim = c(0,2))
lines(c(1:44), map(final_cfgs_cls, ~.[["vi"]]), col = "red")
lines(c(1:44), map(final_cfgs_cwts, ~.[["vi"]]), col = "dodgerblue")
lines(c(1:44), map(final_cims_cls, ~.[["vi"]]), col = "green")
lines(c(1:44), map(final_cims_cwts, ~.[["vi"]]), col = "orange")
lines(c(1:44), map(final_cls_cwts, ~.[["vi"]]), col = "purple")

# Plot all compare scores on the 'nmi' method
plot(c(1:44), map(final_cfgs_cims , ~.[["nmi"]]), type = "l", ylab = 'Comparison Score', main = 'Comparison Scores using NMI Method', ylim = c(0,1))
lines(c(1:44), map(final_cfgs_cls, ~.[["nmi"]]), col = "red")
lines(c(1:44), map(final_cfgs_cwts, ~.[["nmi"]]), col = "dodgerblue")
lines(c(1:44), map(final_cims_cls, ~.[["nmi"]]), col = "green")
lines(c(1:44), map(final_cims_cwts, ~.[["nmi"]]), col = "orange")
lines(c(1:44), map(final_cls_cwts, ~.[["nmi"]]), col = "purple")

# Plot all compare scores on the 'split.join' method
plot(c(1:44), map(final_cfgs_cims , ~.[["split.join"]]), type = "l", ylab = 'Comparison Score', main = 'Comparison Scores using Split.Join Method', ylim = c(0,305))
lines(c(1:44), map(final_cfgs_cls, ~.[["split.join"]]), col = "red")
lines(c(1:44), map(final_cfgs_cwts, ~.[["split.join"]]), col = "dodgerblue")
lines(c(1:44), map(final_cims_cls, ~.[["split.join"]]), col = "green")
lines(c(1:44), map(final_cims_cwts, ~.[["split.join"]]), col = "orange")
lines(c(1:44), map(final_cls_cwts, ~.[["split.join"]]), col = "purple")

# Plot all compare scores on the 'adjusted.rand' method
plot(c(1:44), map(final_cfgs_cims , ~.[["adjusted.rand"]]), type = "l", ylab = 'Comparison Score', main = 'Comparison Scores using Adjusted Rand Method', ylim = c(0,1))
lines(c(1:44), map(final_cfgs_cls, ~.[["adjusted.rand"]]), col = "red")
lines(c(1:44), map(final_cfgs_cwts, ~.[["adjusted.rand"]]), col = "dodgerblue")
lines(c(1:44), map(final_cims_cls, ~.[["adjusted.rand"]]), col = "green")
lines(c(1:44), map(final_cims_cwts, ~.[["adjusted.rand"]]), col = "orange")
lines(c(1:44), map(final_cls_cwts, ~.[["adjusted.rand"]]), col = "purple")

CREATE DATAFRAMES THAT CONTAIN THE COMPARISON SCORES FOR EACH COMPARISON USING EACH METHOD

rand_df <- data.frame(cfgs_cims = unlist(map(final_cfgs_cims, ~.[["rand"]])), 
                      cfgs_cls = unlist(map(final_cfgs_cls, ~.[["rand"]])), 
                      cfgs_cwts = unlist(map(final_cfgs_cwts, ~.[["rand"]])),
                      cims_cls = unlist(map(final_cims_cls, ~.[["rand"]])),
                      cims_cwts = unlist(map(final_cims_cwts, ~.[["rand"]])), 
                      cls_cwts = unlist(map(final_cls_cwts, ~.[["rand"]])))

vi_df <- data.frame(cfgs_cims = unlist(map(final_cfgs_cims, ~.[["vi"]])), 
                      cfgs_cls = unlist(map(final_cfgs_cls, ~.[["vi"]])), 
                      cfgs_cwts = unlist(map(final_cfgs_cwts, ~.[["vi"]])),
                      cims_cls = unlist(map(final_cims_cls, ~.[["vi"]])),
                      cims_cwts = unlist(map(final_cims_cwts, ~.[["vi"]])), 
                      cls_cwts = unlist(map(final_cls_cwts, ~.[["vi"]])))

nmi_df <- data.frame(cfgs_cims = unlist(map(final_cfgs_cims, ~.[["nmi"]])), 
                      cfgs_cls = unlist(map(final_cfgs_cls, ~.[["nmi"]])), 
                      cfgs_cwts = unlist(map(final_cfgs_cwts, ~.[["nmi"]])),
                      cims_cls = unlist(map(final_cims_cls, ~.[["nmi"]])),
                      cims_cwts = unlist(map(final_cims_cwts, ~.[["nmi"]])), 
                      cls_cwts = unlist(map(final_cls_cwts, ~.[["nmi"]])))

splitjoin_df <- data.frame(cfgs_cims = unlist(map(final_cfgs_cims, ~.[["split.join"]])), 
                      cfgs_cls = unlist(map(final_cfgs_cls, ~.[["split.join"]])), 
                      cfgs_cwts = unlist(map(final_cfgs_cwts, ~.[["split.join"]])),
                      cims_cls = unlist(map(final_cims_cls, ~.[["split.join"]])),
                      cims_cwts = unlist(map(final_cims_cwts, ~.[["split.join"]])), 
                      cls_cwts = unlist(map(final_cls_cwts, ~.[["split.join"]])))

adjustedrand_df <- data.frame(cfgs_cims = unlist(map(final_cfgs_cims, ~.[["adjusted.rand"]])), 
                      cfgs_cls = unlist(map(final_cfgs_cls, ~.[["adjusted.rand"]])), 
                      cfgs_cwts = unlist(map(final_cfgs_cwts, ~.[["adjusted.rand"]])),
                      cims_cls = unlist(map(final_cims_cls, ~.[["adjusted.rand"]])),
                      cims_cwts = unlist(map(final_cims_cwts, ~.[["adjusted.rand"]])), 
                      cls_cwts = unlist(map(final_cls_cwts, ~.[["adjusted.rand"]])))

LEAST VARIATION IN A CORRELATION MATRIX CORRESPONDS TO THE METHOD WITH THE MOST CONSISTENT VALUE

print(paste('Adjusted Rand:', sum(abs(cov(cor(adjustedrand_df))))))
## [1] "Adjusted Rand: 1.46100842933161"
print(paste('Split Join:', sum(abs(cov(cor(splitjoin_df))))))
## [1] "Split Join: 0.320237380753974"
print(paste('NMI:', sum(abs(cov(cor(nmi_df))))))
## [1] "NMI: 1.36836995915995"
print(paste('VI:', sum(abs(cov(cor(vi_df))))))
## [1] "VI: 0.322544225620801"
print(paste('Rand:', sum(abs(cov(cor(rand_df))))))
## [1] "Rand: 1.94991070968454"
print(paste('Adjusted Rand:', mean(var(cor(adjustedrand_df)))))
## [1] "Adjusted Rand: 0.00285744806932067"
print(paste('Split Join:', mean(var(cor(splitjoin_df)))))
## [1] "Split Join: 0.00163774969571123"
print(paste('NMI:', mean(var(cov(cor(nmi_df))))))
## [1] "NMI: 5.25001723131306e-05"
print(paste('VI:', mean(var(cov(cor(vi_df))))))
## [1] "VI: 7.95704043385303e-07"
print(paste('Rand:', mean(var(cov(cor(rand_df))))))
## [1] "Rand: 0.000180937637568099"
#print(paste('Adjusted Rand:', sum(cor(adjustedrand_df))))
#print(paste('Split Join:', sum(cor(splitjoin_df))))
#print(paste('NMI:', sum(cor(nmi_df))))
#print(paste('VI:', sum(cor(vi_df))))
#print(paste('Rand:', sum(cor(rand_df))))

VI it is.

# Plot all compare scores on the 'VI' method
plot(c(1:44), map(final_cfgs_cims , ~.[["vi"]]), type = "l", ylab = 'Comparison Score', main = 'Comparison Scores using VI Method', ylim = c(0,2))
lines(c(1:44), map(final_cfgs_cls, ~.[["vi"]]), col = "red")
lines(c(1:44), map(final_cfgs_cwts, ~.[["vi"]]), col = "dodgerblue")
lines(c(1:44), map(final_cims_cls, ~.[["vi"]]), col = "green")
lines(c(1:44), map(final_cims_cwts, ~.[["vi"]]), col = "orange")
lines(c(1:44), map(final_cls_cwts, ~.[["vi"]]), col = "purple")

NOW IDENTIFY WHICH ALGORITHM HAS THE HIGHEST (LOWEST) CORRELATION SCORE WHICH CORRESPONDS TO THE MOST (LEAST) ‘ALIKE’ OF THE CLUSTERING ALGORITHMS

cor(vi_df)
##           cfgs_cims  cfgs_cls cfgs_cwts  cims_cls cims_cwts  cls_cwts
## cfgs_cims 1.0000000 0.6687030 0.6791950 0.8135444 0.6346871 0.7237169
## cfgs_cls  0.6687030 1.0000000 0.7154159 0.6632902 0.6067799 0.8122829
## cfgs_cwts 0.6791950 0.7154159 1.0000000 0.5321228 0.7103841 0.8210753
## cims_cls  0.8135444 0.6632902 0.5321228 1.0000000 0.7047509 0.7927561
## cims_cwts 0.6346871 0.6067799 0.7103841 0.7047509 1.0000000 0.8301251
## cls_cwts  0.7237169 0.8122829 0.8210753 0.7927561 0.8301251 1.0000000
lapply(vi_df, mean)
## $cfgs_cims
## [1] 0.5988712
## 
## $cfgs_cls
## [1] 0.5228651
## 
## $cfgs_cwts
## [1] 0.5597471
## 
## $cims_cls
## [1] 0.6596138
## 
## $cims_cwts
## [1] 0.5805344
## 
## $cls_cwts
## [1] 0.6021255
# Create function to normalize all scores to 0 and 1
range01 <- function(x){(x-min(x))/(max(x)-min(x))}

# Combine all scores dataframes
scoresdf <- bind_rows(vi_df, nmi_df, rand_df, splitjoin_df, adjustedrand_df)

# Apply the normalizing function to the dataframe, output is a list
meancomparescores <- lapply(scoresdf, range01)

# Get the mean scores for the 44 scores for each of the 4 choose 2 comparisons
meancomparescores <- lapply(meancomparescores, mean)
                               
cfgscore <- meancomparescores$cfgs_cims + meancomparescores$cfgs_cls + meancomparescores$cfgs_cwts
cimscore <- meancomparescores$cfgs_cims + meancomparescores$cims_cls + meancomparescores$cims_cwts
clscore <- meancomparescores$cfgs_cls + meancomparescores$cims_cls + meancomparescores$cls_cwts
cwtscore <- meancomparescores$cfgs_cwts + meancomparescores$cims_cwts + meancomparescores$cls_cwts
cfgscore
## [1] 0.1279143
cimscore
## [1] 0.1416928
clscore 
## [1] 0.1315846
cwtscore
## [1] 0.1299602

Cluster_Infomap has the highest average comparison score, but in the first network it only detects one community. More importantly, the comparison scores of all clustering algorithms are clustered (pun intended) between .127 and .141 so choosing any one of them is probably fine.

LET’S SEE HOW THE DIFFERENT CLUSTERING ALGORITHMS EFFECT THE CHART THAT COMPARES THE % OF D AND % OF R IN THE TOP TWO COMMUNITIES FOR EACH NETWORK

# Run community detection on connected blocks
connected_blocks_pos_cfg <- c()
connected_blocks_pos_infomap <- c()
connected_blocks_pos_louvain <- c()
connected_blocks_pos_walktrap <- c()

for (i in seq_along(g_pos_networks_connected)){
  connected_blocks_pos_cfg[[i]] <- cluster_fast_greedy(g_pos_networks_connected[[i]])
  connected_blocks_pos_infomap[[i]] <- cluster_infomap(g_pos_networks_connected[[i]])
  connected_blocks_pos_louvain[[i]] <- cluster_louvain(g_pos_networks_connected[[i]])
  connected_blocks_pos_walktrap[[i]] <- cluster_walktrap(g_pos_networks_connected[[i]])
}
plot(connected_blocks_pos_infomap[[1]], g_pos_networks_connected[[1]], vertex.label = vertex_attr(g_pos_networks_connected[[1]])$Party, main = 'Positive Relationship - Clusters')

party_community_tables_cfg <- c()
party_community_tables_infomap <- c()
party_community_tables_louvain <- c()
party_community_tables_walktrap <- c()

for (i in 1:44){
  party_community_tables_cfg[[i]] <- table(vertex_attr(g_pos_networks_connected[[i]])$Party, connected_blocks_pos_cfg[[i]]$membership)
  party_community_tables_infomap[[i]] <- table(vertex_attr(g_pos_networks_connected[[i]])$Party, connected_blocks_pos_infomap[[i]]$membership)
  party_community_tables_louvain[[i]] <- table(vertex_attr(g_pos_networks_connected[[i]])$Party, connected_blocks_pos_louvain[[i]]$membership)
  party_community_tables_walktrap[[i]] <- table(vertex_attr(g_pos_networks_connected[[i]])$Party, connected_blocks_pos_walktrap[[i]]$membership)
}
# Store vector of dataframes showing number of Ds and Rs in the two'largest'
d_r_prop_tables <- c()

# Function that takes in a contingency table, keeps the two largest communities (columns in the table), renames the communities to 1 and 2, extracts the political 
calculate_largest_communities <- function(tables){
  # Sort table by columnsums (number of nodes in a community) and keep only the top two
  tables <- tables[, names(sort(desc(colSums(prop.table(tables)))))[1:2]]
  colnames(tables) <- c(1,2)
  
  # Extract the rowname (political party) with the highest value in column 1 - that is the dominant political party for that community (column) 
  mainparty_community1 <- names(sort(desc(tables[,1])))[1]
    # Extract the rowname (political party) with the highest value in column 2 - that is the dominant political party for that community (column) 
  mainparty_community2 <- names(sort(desc(tables[,2])))[1]

  # Turn table into df
  table_dfs <- as.data.frame(tables)
  
  # Remove non Democrat and non Republican members
  table_dfs <- table_dfs %>% subset(Var1 %in% c('D', 'R'))
  table_dfs$Var1 <- factor(table_dfs$Var1)
  
  # Calculate a proportion from the Frequency counts
  table_dfs <- table_dfs %>% mutate(Prop = round(Freq/sum(Freq), 3))
  table_dfs$MainPartyCommunity <- '.'
  table_dfs$MainPartyCommunity[table_dfs$Var2 == 1] <- mainparty_community1
  table_dfs$MainPartyCommunity[table_dfs$Var2 == 2] <- mainparty_community2
  return(table_dfs)
}

for (i in seq_along(party_community_tables_walktrap)){
  d_r_prop_tables[[i]] <- calculate_largest_communities(party_community_tables_walktrap[[i]])
}

# Convert list of dataframes into 1 dataframe and include .id = 'Session' so we know what list(session) the data is from
party_community_df <- bind_rows(d_r_prop_tables, .id = 'Session')
# Get the maximum Freq when comparing the session and community.
party_community_df <- party_community_df %>% group_by(Session, Var2) %>% mutate(dom = max(Freq))

# Create new column which should have the label of the dominant party in a specific Session-Community. Then we can label communities as 'Repub' or 'Dem' instead of 1 and 2 which sometimes are R-dominated and othertimes D-dominates
party_community_df <- party_community_df %>% group_by(Session, Var2) %>% mutate(mainparty = ifelse(dom == Freq, as.character(Var1), 'ignore'))
party_community_df <- party_community_df %>% group_by(Session, Var2) %>% mutate(Percentage = Freq/sum(Freq))

# Change Factors to Character, Character to Numeric for area plotting
party_community_df$Session <- as.numeric(party_community_df$Session)
party_community_df$Var1 <- as.character(party_community_df$Var1)

# Ggplot was having issues with the tibble, so converting to dataframe
party_community_df <- data.frame(party_community_df)

# Changing Variable Names and Values for interpretation purposes
names(party_community_df)[names(party_community_df) == "Var1"] <- "Political Party"
party_community_df$`Political Party`[party_community_df$`Political Party` == 'D'] <- 'Democrat'
party_community_df$`Political Party`[party_community_df$`Political Party` == 'R'] <- 'Republican'

# Remove any sessions in which one party dominated both communities for the sake of the visualization below
party_community_df <- party_community_df %>% group_by(Session) %>% mutate('1partydom' = length(unique(mainparty)))
party_community_df <- party_community_df %>% group_by(Session) %>% subset(`1partydom` > 2)
# Show Percentage of Ds and Rs in the House for the D-dominated Community and R-dominated Community Over time. Ignoring session 2 because both communities were D-dominated
#ggplot(party_community_df[party_community_df$Session %in% c(1, 3:22), ], aes(x = Session, y = Percentage, fill = `Political Party`)) +
ggplot(party_community_df[party_community_df$Session <= 22, ], aes(x = Session, y = Percentage, fill = `Political Party`)) +
  geom_area(alpha=0.85, size = 1) + 
  
  facet_wrap(~MainPartyCommunity, 
             labeller = as_labeller(c('D' = 'Democrat-Dominated Community', 'R' = 'Republican-Dominated Community'))) + 
  
  scale_fill_manual(values = c("blue", "red")) + 
  
  ggtitle('Trends in Polarization in the U.S. House of Representatives\nEvolution of the Distribution of Political Party Membership in Detected Communities') +
  
  ylab("Percentage of Total Community Membership by Party") +
  
  xlab("Congressional Session") +
  
  theme_economist() +
  
  scale_color_economist() +
  
  theme(axis.title.y = element_text(size = 16, angle = 90), 
        axis.text.y = element_text(size = 13, margin = margin(r = 0)),
        axis.title.x = element_text(size = 16),
        axis.text.x = element_text(size = 13),
        legend.title = element_text(size = 16),
        legend.box.margin = margin(6),
        legend.background = element_rect(fill = "lightgray"),
        plot.title = element_text(hjust = 0.5),
        panel.grid.major = element_blank(),
        panel.spacing = unit(0, "mm"))

# Show Percentage of Ds and Rs in the Senate for the D-dominated Community and R-dominated Community Over time. Ignoring session 26 because both communities were D-dominated
ggplot(party_community_df[party_community_df$Session >= 23, ], aes(x = Session, y = Percentage, fill = `Political Party`)) + 
  
  geom_area(alpha = .85, size = 1) + facet_wrap(~MainPartyCommunity) + 
    
  facet_wrap(~MainPartyCommunity, 
             labeller = as_labeller(c('D' = 'Democrat-Dominated Community', 'R' = 'Republican-Dominated Community'))) + 
  
  scale_fill_manual(values = c("blue", "red")) + 
  
  ggtitle('Trends in Polarization in the U.S. Senate\nEvolution of the Distribution of Political Party Membership in Detected Communities') +
  
  ylab("Percentage of Total Community Membership by Party") +
  
  xlab("Congressional Session") +
  
  theme_economist() +
  
  scale_color_economist() +
  
  theme(axis.title.y = element_text(size = 16, angle = 90), 
        axis.text.y = element_text(size = 13, margin = margin(r = 0)),
        axis.title.x = element_text(size = 16),
        axis.text.x = element_text(size = 13),
        legend.title = element_text(size = 16),
        legend.box.margin = margin(6),
        legend.background = element_rect(fill = "lightgray"),
        plot.title = element_text(hjust = 0.5),
        panel.grid.major = element_blank(),
        panel.spacing = unit(0, "mm"))

#Constructing Complete Network ——

CONSTRUCTING COMPLETE NETWORK TO SEE IF BEING IN A NETWORK FOR A LONGER TIME HAS AN IMPACT ON LIKELIHOOD TO FORM POSITIVE RELATIONSHIP

# Need to make these graphs weighted because multiple connections are being collapsed into one (maybe use degree)
entire_senate_pos_network <- union(g_pos_networks[[23]], g_pos_networks[24:44])
entire_senate_neg_network <- union(g_neg_networks[[23]], g_neg_networks[24:44])
entire_house_pos_network <- union(g_pos_networks[[1]], g_pos_networks[2:22])
entire_house_neg_network <- union(g_neg_networks[[1]], g_neg_networks[2:22])

#print(length(V(entire_senate_pos_network)))
#entire_senate_pos_network <- union(entire_senate_pos_network, g_pos_networks[[25]])
#print(length(V(entire_senate_pos_network)))
#entire_senate_pos_network <- union(entire_senate_pos_network, g_pos_networks[[26]])
#print(length(V(entire_senate_pos_network)))
#entire_senate_pos_network <- union(entire_senate_pos_network, g_pos_networks[[27]])
#print(length(V(entire_senate_pos_network)))
#entire_senate_pos_network <- union(entire_senate_pos_network, g_pos_networks[[28]])
#print(length(V(entire_senate_pos_network)))
#entire_senate_pos_network <- union(entire_senate_pos_network, g_pos_networks[[29]])
#print(length(V(entire_senate_pos_network)))
#entire_senate_pos_network <- union(entire_senate_pos_network, g_pos_networks[[30]])
#print(length(V(entire_senate_pos_network)))
#entire_senate_pos_network <- union(entire_senate_pos_network, g_pos_networks[[31]])
#print(length(V(entire_senate_pos_network)))
#entire_senate_pos_network <- union(entire_senate_pos_network, g_pos_networks[[32]])
#print(length(V(entire_senate_pos_network)))
#entire_senate_pos_network <- union(entire_senate_pos_network, g_pos_networks[[33]])
#print(length(V(entire_senate_pos_network)))
#entire_senate_pos_network <- union(entire_senate_pos_network, g_pos_networks[[34]])
#print(length(V(entire_senate_pos_network)))
#entire_senate_pos_network <- union(entire_senate_pos_network, g_pos_networks[[35]])
#print(length(V(entire_senate_pos_network)))
#entire_senate_pos_network <- union(entire_senate_pos_network, g_pos_networks[[36]])
#print(length(V(entire_senate_pos_network)))
#entire_senate_pos_network <- union(entire_senate_pos_network, g_pos_networks[[37]])
#print(length(V(entire_senate_pos_network)))
#entire_senate_pos_network <- union(entire_senate_pos_network, g_pos_networks[[38]])
#print(length(V(entire_senate_pos_network)))
#entire_senate_pos_network <- union(entire_senate_pos_network, g_pos_networks[[39]])
#print(length(V(entire_senate_pos_network)))
#entire_senate_pos_network <- union(entire_senate_pos_network, g_pos_networks[[40]])
#print(length(V(entire_senate_pos_network)))
#entire_senate_pos_network <- union(entire_senate_pos_network, g_pos_networks[[41]])
#print(length(V(entire_senate_pos_network)))
#entire_senate_pos_network <- union(entire_senate_pos_network, g_pos_networks[[42]])
#print(length(V(entire_senate_pos_network)))
#entire_senate_pos_network <- union(entire_senate_pos_network, g_pos_networks[[43]])
#print(length(V(entire_senate_pos_network)))
#entire_senate_pos_network <- union(entire_senate_pos_network, g_pos_networks[[44]])
#print(length(V(entire_senate_pos_network)))
# Network of every single possible actor to see if it is a connected network
meta <- union(entire_house_pos_network, entire_senate_pos_network)
meta <- union(meta, entire_house_neg_network)
meta <- union(meta, entire_senate_neg_network)
print(is_connected(meta))
## [1] TRUE

Interesting that everyone in the network (across time and legislative bodies) is connected

mean_distance(meta, directed = FALSE)
## [1] 1.863896
distance_table(meta)
## $res
## [1]  517620 1647548  188350    4188
## 
## $unconnected
## [1] 0
plot(entire_senate_pos_network, vertex.label = NA, layout = layout.reingold.tilford(entire_senate_pos_network, circular=T), vertex.size = 1)

CREATE A DATAFRAME OF EVERY SENATE/CONGRESS FOR SIMILAR REASONS TO THE ANALYSIS ABOVE WE CAN COUNT HOW MANY TIMES A REP APPEARS IN THE DATAFRAME TO KNOW HOW MANY SESSIONS THEY SERVED

# Use the list of dataframes created when originally reading in the data and use only the first four columns of name, state, party
for (i in seq_along(df)){
  df[[i]] <- df[[i]][,1:4]
}

# Bind all the dataframes together
all_sessions_df <- bind_rows(df)

# Calculate the number of members for each network
membs <- c()
for (i in seq_along(df)){
  membs[[i]] <- nrow(df[[i]])
}

# Create a vector of session numbers
seshs <- rep(93:114, 2)

# Create a vector of house types (House vs Senate)
type <- rep(c('H','S'), each = 22)

# Combine the session and session type
sesh_type <- paste(type,seshs)

# Add the session number for each row in the dataframe
all_sessions_df$Sesh_type <- rep(sesh_type, membs)

# Add the number of times they have served as a new variable called Tenure
all_sessions_df <- all_sessions_df %>% group_by(Covariates) %>% mutate('Tenure' = n())

USING THE ‘ENTIRE’ NETWORKS CREATED EARLIER, ADD THE NUMBER OF POSITIVE/NEGATIVE CONNECTIONS FOR THE SENATE AND HOUSE

# Get the degree for both networks (positive and negative) for both the Senate and House 'entire' networks
# The degrees accumulate from each network (if someone served 2 terms as a senator and had 10 positive connections in one session and 15 in another, their degree in the all_sessions network would be 25)
senate_pos_connections <- degree(entire_senate_pos_network)
senate_neg_connections <- degree(entire_senate_neg_network)
house_pos_connections <- degree(entire_house_pos_network)
house_neg_connections <- degree(entire_house_neg_network)

# Then add these results to the all_sessions_df dataframe
all_sessions_df$Total_Unique_Senate_Pos_Connections <- senate_pos_connections[match(all_sessions_df$Covariates, names(senate_pos_connections))]
all_sessions_df$Total_Unique_Senate_Neg_Connections <- senate_neg_connections[match(all_sessions_df$Covariates, names(senate_neg_connections))]
all_sessions_df$Total_Unique_House_Pos_Connections <- house_pos_connections[match(all_sessions_df$Covariates, names(house_pos_connections))]
all_sessions_df$Total_Unique_House_Neg_Connections <- house_neg_connections[match(all_sessions_df$Covariates, names(house_neg_connections))]
# Group by individuals and:

# keep their tenure, total positive connections, and party (each rep has only served in 1 party - this was checked using all_sessions_df %>% group_by(Covariates) %>% summarize('Parties' = n_distinct(Party)) %>% arrange(Parties))
all_sessions_df_tspc <- all_sessions_df %>% group_by(Covariates) %>% summarize(tspc = mean(Total_Unique_Senate_Pos_Connections, na.rm = TRUE), tenure = mean(Tenure), party = first(Party))
## `summarise()` ungrouping output (override with `.groups` argument)
all_sessions_df_tspc <- na.omit(all_sessions_df_tspc)

# keep their tenure, total negative connections, and party
all_sessions_df_tsnc <- all_sessions_df %>% group_by(Covariates) %>% summarize(tsnc = mean(Total_Unique_Senate_Neg_Connections, na.rm = TRUE), tenure = mean(Tenure), party = first(Party))
## `summarise()` ungrouping output (override with `.groups` argument)
all_sessions_df_tsnc <- na.omit(all_sessions_df_tsnc)

CALCULATE DEGREE PER SESSION TO SEE IF THE ALL_SESSIONS_DF IS JUST LOOKING AT JUST UNIQUE EDGES OR TOTAL EDGES

all_pos_degrees <- c()
all_neg_degrees <- c()


for (i in seq_along(g_pos_networks)){
  all_pos_degrees[[i]] <- degree(g_pos_networks[[i]])
}

for (i in seq_along(g_neg_networks)){
  all_neg_degrees[[i]] <- degree(g_neg_networks[[i]])
}

# Pull out 1 senator to compare degrees per session to the 'Senate_Pos_Connections' in the all_sessions_df dataframe
ewar_degrees <- c()

for (i in seq_along(all_pos_degrees)){
  ewar_degrees[[i]] <- all_pos_degrees[[i]]['Warren, E. (MA-D)']
}
ewar_degrees[43:44]
## [[1]]
## Warren, E. (MA-D) 
##                39 
## 
## [[2]]
## Warren, E. (MA-D) 
##                33
all_sessions_df %>%  filter(Covariates == 'Warren, E. (MA-D)')
## # A tibble: 2 x 10
## # Groups:   Covariates [1]
##   Covariates Name  State Party Sesh_type Tenure Total_Unique_Se~
##   <chr>      <chr> <chr> <chr> <chr>      <int>            <dbl>
## 1 Warren, E~ "War~ MA    D     S 113          2               41
## 2 Warren, E~ "War~ MA    D     S 114          2               41
## # ... with 3 more variables: Total_Unique_Senate_Neg_Connections <dbl>,
## #   Total_Unique_House_Pos_Connections <dbl>,
## #   Total_Unique_House_Neg_Connections <dbl>

These must be unique connections, because Elizabeth Warren has 39 positive connections in one session and 33 in another, but only 41 for the total senate positive connections

Let’s add the total number of positive connections in each session

# Create a dataframe of each rep for each session and their degree

# Start by unpacking the list of vectors and keeping their names (which correspond to the vertex names and includes Name, party, state)
all_pos_degrees <- unlist(all_pos_degrees, recursive = TRUE, use.names = TRUE)
all_neg_degrees <- unlist(all_neg_degrees, recursive = TRUE, use.names = TRUE)

# Then turn that unpack list into a dataframe
all_pos_degrees_df <- as.data.frame(all_pos_degrees)
all_neg_degrees_df <- as.data.frame(all_neg_degrees)

# Then assign the names from the unpacked list (the vertex names) to a new column in the dataframe
all_pos_degrees_df$Covariates <- names(all_pos_degrees)
all_neg_degrees_df$Covariates <- names(all_neg_degrees)

# Finally, assign these variables to the all_sessions_df
all_sessions_df$Pos_Connections <- all_pos_degrees_df$all_pos_degrees
all_sessions_df$Neg_Connections <- all_neg_degrees_df$all_neg_degrees
#all_sessions_df_pc <- all_sessions_df %>% group_by(Covariates) %>% summarize(pc = mean(Total_Unique_Senate_Pos_Connections, na.rm = TRUE), tenure = mean(Tenure), party = first(Party))
#all_sessions_df_pc <- na.omit(all_sessions_df_tspc)
#
#all_sessions_df_pc <- all_sessions_df %>% group_by(Covariates) %>% summarize(tspc = mean(Total_Unique_Senate_Pos_Connections, na.rm = TRUE), tenure = mean(Tenure), party = first(Party))
#all_sessions_df_pc <- na.omit(all_sessions_df_tspc)
par(mfrow=c(1,2))
plot(all_sessions_df$Tenure, all_sessions_df$Pos_Connections)
plot(all_sessions_df$Tenure, all_sessions_df$Neg_Connections)

Tenure excludes data before 1973 (if a Representative served in the 90-94th Sessions, our data would only show a tenure of 2 because we start in the 93 Session.)

lm(all_sessions_df$Pos_Connections ~ all_sessions_df$Tenure + all_sessions_df$Sesh_type)
## 
## Call:
## lm(formula = all_sessions_df$Pos_Connections ~ all_sessions_df$Tenure + 
##     all_sessions_df$Sesh_type)
## 
## Coefficients:
##                    (Intercept)          all_sessions_df$Tenure  
##                        63.6222                         -0.1301  
## all_sessions_df$Sesh_typeH 101  all_sessions_df$Sesh_typeH 102  
##                        12.6359                         19.7125  
## all_sessions_df$Sesh_typeH 103  all_sessions_df$Sesh_typeH 104  
##                        21.9602                         16.8118  
## all_sessions_df$Sesh_typeH 105  all_sessions_df$Sesh_typeH 106  
##                        29.3099                         32.2093  
## all_sessions_df$Sesh_typeH 107  all_sessions_df$Sesh_typeH 108  
##                        29.8245                         36.1628  
## all_sessions_df$Sesh_typeH 109  all_sessions_df$Sesh_typeH 110  
##                        33.2253                         41.5415  
## all_sessions_df$Sesh_typeH 111  all_sessions_df$Sesh_typeH 112  
##                        37.2340                         56.4137  
## all_sessions_df$Sesh_typeH 113  all_sessions_df$Sesh_typeH 114  
##                        60.4736                         56.2880  
##  all_sessions_df$Sesh_typeH 93   all_sessions_df$Sesh_typeH 94  
##                       -27.0230                        -25.7553  
##  all_sessions_df$Sesh_typeH 95   all_sessions_df$Sesh_typeH 96  
##                       -24.0898                        -26.4635  
##  all_sessions_df$Sesh_typeH 97   all_sessions_df$Sesh_typeH 98  
##                       -21.9836                         -7.7706  
##  all_sessions_df$Sesh_typeH 99  all_sessions_df$Sesh_typeS 100  
##                         1.0303                        -50.0543  
## all_sessions_df$Sesh_typeS 101  all_sessions_df$Sesh_typeS 102  
##                       -46.5844                        -48.0874  
## all_sessions_df$Sesh_typeS 103  all_sessions_df$Sesh_typeS 104  
##                       -49.6447                        -50.0478  
## all_sessions_df$Sesh_typeS 105  all_sessions_df$Sesh_typeS 106  
##                       -44.1836                        -45.8866  
## all_sessions_df$Sesh_typeS 107  all_sessions_df$Sesh_typeS 108  
##                       -48.6012                        -49.4653  
## all_sessions_df$Sesh_typeS 109  all_sessions_df$Sesh_typeS 110  
##                       -46.7075                        -45.4956  
## all_sessions_df$Sesh_typeS 111  all_sessions_df$Sesh_typeS 112  
##                       -44.7569                        -38.0871  
## all_sessions_df$Sesh_typeS 113  all_sessions_df$Sesh_typeS 114  
##                       -36.1884                        -33.8563  
##  all_sessions_df$Sesh_typeS 93   all_sessions_df$Sesh_typeS 94  
##                       -51.3802                        -52.4067  
##  all_sessions_df$Sesh_typeS 95   all_sessions_df$Sesh_typeS 96  
##                       -54.1386                        -54.3989  
##  all_sessions_df$Sesh_typeS 97   all_sessions_df$Sesh_typeS 98  
##                       -53.7112                        -52.0169  
##  all_sessions_df$Sesh_typeS 99  
##                       -51.6017
all_sessions_df_tspc %>% 
  ggplot(aes(x = tenure, y = tspc, color = party)) +
  geom_point() +
  geom_smooth() +
  scale_color_manual(values=c("white", "blue", "purple", "red")) +
  labs(title="Comparing Total Unique Positive Connections to Tenure",
        x = "Tenure", y = "Total Unique Positive Connections")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

all_sessions_df_tsnc %>% 
  ggplot(aes(x = tenure, y = tsnc, color = party)) +
  geom_point() +
  geom_smooth() +
  scale_color_manual(values=c("white", "blue", "purple", "red")) +
  labs(title="Comparing Total Unique Negative Connections to Tenure",
        x = "Tenure", y = "Total Unique Negative Connections")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

lm(all_sessions_df_tspc$tspc ~ all_sessions_df_tsnc$tenure)
## 
## Call:
## lm(formula = all_sessions_df_tspc$tspc ~ all_sessions_df_tsnc$tenure)
## 
## Coefficients:
##                 (Intercept)  all_sessions_df_tsnc$tenure  
##                      15.590                        3.477
all_sessions_df %>% 
  ggplot(aes(x = Tenure, y = Pos_Connections, color = Party)) +
  geom_point() +
  geom_smooth() +
  scale_color_manual(values=c("white", "blue", "purple", "red", "green", "black")) +
  labs(title="Comparing Total Positive Connections to Tenure",
        x = "Tenure", y = "Total Positive Connections")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Computation failed in `stat_smooth()`:
## x has insufficient unique values to support 10 knots: reduce k.

all_sessions_df %>% 
  ggplot(aes(x = Tenure, y = Neg_Connections, color = Party)) +
  geom_point() +
  geom_smooth() +
  scale_color_manual(values=c("white", "blue", "purple", "red", "green", "black")) +
  labs(title="Comparing Total Negative Connections to Tenure",
        x = "Tenure", y = "Total Negative Connections")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Computation failed in `stat_smooth()`:
## x has insufficient unique values to support 10 knots: reduce k.

#all_sessions_df %>%  filter(Covariates == 'Cochran, T. (MS-R)')
all_sessions_df %>% group_by(Covariates) %>% summarize('Parties' = n_distinct(Party)) %>% arrange(Parties)
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 2,170 x 2
##    Covariates               Parties
##    <chr>                      <int>
##  1 Abdnor, J. (SD-R)              1
##  2 Abercrombie, N. (HI-D)         1
##  3 Abourezk, J. (SD-D)            1
##  4 Abraham, R. (LA-R)             1
##  5 Abraham, S. (MI-R)             1
##  6 Abzug, B. (NY-D)               1
##  7 Acevedo-Vilv°, A. (PR-P)       1
##  8 Ackerman, G. (NY-D)            1
##  9 Adams, A. (NC-D)               1
## 10 Adams, B. (WA-D)               1
## # ... with 2,160 more rows

#Unused Code/Garbage Can ——

# Function reads in table with columns as communities and Partys as rows
# Turns table into a dataframe (goes from wide to skinny) with 3 columns where Var1 is Party, Var2 is Community, and Freq is the value count of the number of people of a certain party in a community
calculate_largest_communities <- function(tables){
  # Turn table into df
  table_dfs <- as.data.frame(tables)
  # Create second df grouped on Community with Total = total membership
  group_sum <- table_dfs %>% group_by(Var2) %>% summarize(Total = sum(Freq), .groups = 'drop')
  
  # We only want the two largest communities; check if the unique number of communities is greater than 2
  if (length(unique(table_dfs$Var2)) > 2){
  # If greater than 2 communities, identify the community with the smallest membership
  group_sum <- group_sum %>% slice(which.min(Total))
  # Filter the dataframe to remove the community with the smallest membership
  table_dfs <- table_dfs %>% filter(Var2 != group_sum$Var2)
  }
  
  group_sum <- table_dfs %>% group_by(Var2) %>% summarize(Total = sum(Freq), .groups = 'drop')
  
  if (length(unique(table_dfs$Var2)) > 2){
  group_sum <- group_sum %>% slice(which.min(Total))
  table_dfs <- table_dfs %>% filter(Var2 != group_sum$Var2)
  }
  
  group_sum <- table_dfs %>% group_by(Var2) %>% summarize(Total = sum(Freq), .groups = 'drop')
  
  if (length(unique(table_dfs$Var2)) > 2){
  group_sum <- group_sum %>% slice(which.min(Total))
  table_dfs <- table_dfs %>% filter(Var2 != group_sum$Var2)
  }
  
  table_dfs <- table_dfs %>% subset(Var1 %in% c('D', 'R'))
  return(table_dfs)
}
for (i in 1:18){
ggplot(d_r_prop_tables[[i]], aes(fill=Var1, y=Freq, x=Var2)) + 
    geom_bar(position="stack", stat="identity")
}
# Or using the data table
calcparties <- function(tables){
     tables <- tables[, names(sort(desc(colSums(prop.table(tables)))))[1:2]]
     colnames(tables) <- c(1,2)
     
     print(tables)
     
     print(names(sort(desc(tables[,1])))[1])
     print(names(sort(desc(tables[,2])))[1])
     
     mainparty_community1 <- names(sort(desc(tables[,1])))[1]
     mainparty_community2 <- names(sort(desc(tables[,2])))[1]
     
     
     return(mainparty_community1)}
calcparties(party_community_tables[[2]])
##    
##       1   2
##   D 124 133
##   I   1   0
##   P   0   1
##   R 118  14
## [1] "D"
## [1] "D"
## [1] "D"

RETHINKING SBM MODEL DUE TO DEGREE DISTRIBUTIONS

# Examine the degree distribution of the network graphs

par(mfrow = c(2,2))

hist(degree(g_pos)[vertex_attr(g_pos)$Party == 'D'], , xlab = 'Degree', 
     main = 'Degree Distribution Positive Connections - Democratic Party', col = 'blue')

hist(degree(g_pos)[vertex_attr(g_pos)$Party == 'R'], , xlab = 'Degree', 
     main = 'Degree Distribution Positive Connections - Republican Party', col = 'red')

hist(degree(g_neg)[vertex_attr(g_neg)$Party == 'D'], , xlab = 'Degree', 
     main = 'Degree Distribution Negative Connections - Democratic Party', col = 'blue')

hist(degree(g_neg)[vertex_attr(g_pos)$Party == 'R'], , xlab = 'Degree', 
     main = 'Degree Distribution Negative Connections - Republican Party', col = 'red')

Similar degree distributions when comparing parties on the same type of network (positive vs negative), although the distribution is varied enough that we should consider a degree-corrected stochastic block model.